export class CsvParser {
}

class Token {
    constructor(readonly value: string, readonly symbol: Symbol) {
    }
}


// Single chars
const LF = "\x0A"
const CR = "\x0D"
const DQUOTE = "\x22"
const COMMA = "\x2C"
// unicode not including the ones above and no control chars
const TEXT = /^[^\p{Control}\x22\x2C]+/u
// Unicode class Control is \x00-\x1F and \x7F-\x9F

const SYMBOLS = {
    comma: Symbol('comma'),
    cr: Symbol('cr'),
    lf: Symbol('lf'),
    dquote: Symbol('dquote'),
    eoi: Symbol('endOfInput'),
    text: Symbol('text')
}

const comma = new Token(COMMA, SYMBOLS.comma)
const cr = new Token(CR, SYMBOLS.cr)
const lf = new Token(LF, SYMBOLS.lf)
const dquote = new Token(DQUOTE, SYMBOLS.dquote)
const eoi = new Token('EndOfInput', SYMBOLS.eoi)


type CsvObject = { [key: string]: string }
type StringArrayArray = string[][]

/**
 * Parses a string as CSV (@see https://datatracker.ietf.org/doc/html/rfc4180)
 *
 * @param input
 * @param args
 */
export function parseCsv(input: string, args: { eol?: "\n" | "\r\n", header: true }): CsvObject[]
export function parseCsv(input: string, args?: { eol?: "\n" | "\r\n", header?: false }): StringArrayArray
export function parseCsv(input: string, args: {
    eol?: "\n" | "\r\n",
    header: boolean
}): CsvObject[] | StringArrayArray
export function parseCsv(input: string, args?: {
    eol?: "\n" | "\r\n",
    header?: boolean
}): CsvObject[] | StringArrayArray {
    let idx = 0
    const end = input.length
    let nextToken = readNextToken()
    let nrFields: number | null = null

    function bail(msg: string): never {
        throw new Error(msg)
    }

    function expect<T>(input: T | null | undefined): T {
        if (input === null || input === undefined) bail('expected input')
        return input
    }

    function isStartOfEol(peek: Token) {
        if (args?.eol === lf.value) {
            return peek === lf
        } else {
            return peek === cr
        }
    }

    function readEOL() {
        if (args?.eol === lf.value) {
            readToken(lf)
        } else {
            readToken(cr)
            readToken(lf)
        }
    }

    function readNextToken(): Token {
        if (idx >= end) {
            return eoi
        }
        const n = expect(input.at(idx))
        let res
        switch (n) {
            case CR:
                res = cr
                break
            case LF:
                res = lf
                break
            case DQUOTE:
                res = dquote
                break
            case COMMA:
                res = comma
                break
        }
        if (res) {
            ++idx
            return res
        }
        const match = (TEXT.exec(input.substring(idx)))
        if (match === null) bail(`Unexpected input at '${input.substring(idx, idx + 10)}'`)
        res = new Token(expect(match.at(0)), SYMBOLS.text)
        idx += res.value.length
        return res
    }

    function getNextToken() {
        const ret = nextToken
        nextToken = readNextToken()
        return ret
    }

    function peekNextToken() {
        return nextToken
    }

    function readNonEscaped() {
        if (peekNextToken().symbol === SYMBOLS.text) {
            return getNextToken()
        }
        return new Token('', SYMBOLS.text)
    }

    function readToken(what: Token) {
        const actual = getNextToken()
        if (what.symbol !== actual.symbol) {
            bail(`Expected ${what.symbol.description}, got ${actual.symbol.description}`)
        }
    }

    function readEscaped() {
        readToken(dquote)
        let n = getNextToken()
        let res = ''
        // console.log(n.value)
        while (true) {
            if (n === dquote) {
                if (peekNextToken() !== dquote) {
                    // Found the end of the quoted string, return what we have
                    return new Token(res, SYMBOLS.text)
                }
                // 2 ":s, an escaped "! Put a single on in the result and eat the second one.
                readToken(dquote)
            }
            if (!(n === dquote || n === comma || n === cr || n === lf || n.symbol === SYMBOLS.text)) {
                bail(`Unexpected input ${n.value}`)
            }
            res += n.value
            n = getNextToken()
        }
    }

    function readField() {
        return peekNextToken() === dquote ? readEscaped().value : readNonEscaped().value
    }

    function readRecord() {
        const res = [readField()]
        while (true) {
            if (peekNextToken() !== comma) {
                break
            }
            readToken(comma)
            res.push(readField())
        }
        if (!nrFields) {
            nrFields = res.length
        } else if (nrFields !== res.length) {
            bail('Inconsistent number of fields in input')
        }
        return res
    }

    function readHeader() {
        const headers = readRecord()
        const nrUnique = new Set(headers).size
        if (nrUnique !== headers.length) {
            bail('Headers not unique')
        }
        return headers
    }

    function makeObject(header: string[], res: StringArrayArray) {
        const objects = []
        for (const row of res) {
            const obj: CsvObject = {}
            for (let i = 0; i < header.length; i++) {
                const key = header.at(i)!
                obj[key] = row[i]!
            }
            objects.push(obj)
        }
        return objects
    }

    function readFile() {
        const header = args?.header ? readHeader() : null
        if (header) readEOL()

        // If there is a header, we allow 0 rows of input. In that case we should have EOI or EOL EOI
        if (header) {
            const peek = peekNextToken()
            if (peek === eoi) {
                return []
            } else if (isStartOfEol(peek)) {
                readEOL()
                readToken(eoi)
                return []
            }
        }
        // Otherwise, there must be at least 1 row.
        const res = [readRecord()]
        while (true) {
            // may be the end, then finish
            if (peekNextToken() === eoi) {
                break
            }
            // in anything else, it should be a CRLF
            readEOL()
            // may be the end
            if (peekNextToken() === eoi) {
                break
            }
            // else read another record and go again
            res.push(readRecord())
        }

        return header ? makeObject(header, res) : res
    }

    return readFile()
}
