The Wayback Machine - https://web.archive.org/web/20201202135818/https://github.com/qzchenwl/hiveql-parser
Skip to content
master
Go to file
Code

Latest commit

 

Git stats

Files

Permalink
Failed to load latest commit information.
Type
Name
Latest commit message
Commit time
src
 
 
 
 
 
 

README.md

hiveql-parser

HiveQL Parser. Parse HiveQL code and print AST in JSON format if success(exit 0), else print well formed syntax error message(exit 1).

Build

$ mvn package

To build standalone jar, use:

$ mvn clean compile assembly:single

Run

$ javar -jar /path/to/hiveql-parser.jar /path/to/your-code.sql

Examples

$ java -jar hiveql-parser.jar <(echo "select count(*) as count, myfield from &0rz") 2>/dev/null
[1,39]: line 1:39 cannot recognize input near '&' '0rz' '<EOF>' in join source
$ java -jar hiveql-parser.jar <(echo "select count(*) as count, myfield from 0rz") 2>/dev/null | jq .
{
  "startIndex": 0,
  "stopIndex": 18,
  "childIndex": -1,
  "children": [
    {
      "startIndex": 0,
      "stopIndex": 16,
      "token": {
        "type": 860,
        "line": 0,
        "charPositionInLine": -1,
        "channel": 0,
        "text": "TOK_QUERY",
        "index": -1,
        "start": 0,
        "stop": 0
      },
      "childIndex": 0,
      "children": [
        {
          "startIndex": 14,
          "stopIndex": 16,
          "token": {
            "type": 748,
            "line": 0,
            "charPositionInLine": -1,
            "channel": 0,
            "text": "TOK_FROM",
            "index": -1,
            "start": 0,
            "stop": 0
          },
          "childIndex": 0,
          "children": [
            {
              "startIndex": 16,
              "stopIndex": 16,
              "token": {
                "type": 954,
                "line": 0,
                "charPositionInLine": -1,
                "channel": 0,
                "text": "TOK_TABREF",
                "index": -1,
                "start": 0,
                "stop": 0
              },
              "childIndex": 0,
              "children": [
                {
                  "startIndex": 16,
                  "stopIndex": 16,
                  "token": {
                    "type": 953,
                    "line": 0,
                    "charPositionInLine": -1,
                    "channel": 0,
                    "text": "TOK_TABNAME",
                    "index": -1,
                    "start": 0,
                    "stop": 0
                  },
                  "childIndex": 0,
                  "children": [
                    {
                      "startIndex": 16,
                      "stopIndex": 16,
                      "token": {
                        "type": 26,
                        "line": 1,
                        "charPositionInLine": 39,
                        "channel": 0,
                        "index": 16,
                        "start": 39,
                        "stop": 41
                      },
                      "childIndex": 0,
                      "children": [],
                      "toString": "0rz",
                      "toStringTree": "0rz"
                    }
                  ],
                  "toString": "TOK_TABNAME",
                  "toStringTree": "(tok_tabname 0rz)"
                }
              ],
              "toString": "TOK_TABREF",
              "toStringTree": "(tok_tabref (tok_tabname 0rz))"
            }
          ],
          "toString": "TOK_FROM",
          "toStringTree": "(tok_from (tok_tabref (tok_tabname 0rz)))"
        },
        {
          "startIndex": -1,
          "stopIndex": 12,
          "token": {
            "type": 772,
            "line": 0,
            "charPositionInLine": -1,
            "channel": 0,
            "text": "TOK_INSERT",
            "index": -1,
            "start": 0,
            "stop": 0
          },
          "childIndex": 1,
          "children": [
            {
              "startIndex": -1,
              "stopIndex": -1,
              "token": {
                "type": 726,
                "line": 0,
                "charPositionInLine": -1,
                "channel": 0,
                "text": "TOK_DESTINATION",
                "index": -1,
                "start": 0,
                "stop": 0
              },
              "childIndex": 0,
              "children": [
                {
                  "startIndex": -1,
                  "stopIndex": -1,
                  "token": {
                    "type": 727,
                    "line": 0,
                    "charPositionInLine": -1,
                    "channel": 0,
                    "text": "TOK_DIR",
                    "index": -1,
                    "start": 0,
                    "stop": 0
                  },
                  "childIndex": 0,
                  "children": [
                    {
                      "startIndex": -1,
                      "stopIndex": -1,
                      "token": {
                        "type": 963,
                        "line": 0,
                        "charPositionInLine": -1,
                        "channel": 0,
                        "text": "TOK_TMP_FILE",
                        "index": -1,
                        "start": 0,
                        "stop": 0
                      },
                      "childIndex": 0,
                      "children": [],
                      "toString": "TOK_TMP_FILE",
                      "toStringTree": "tok_tmp_file"
                    }
                  ],
                  "toString": "TOK_DIR",
                  "toStringTree": "(tok_dir tok_tmp_file)"
                }
              ],
              "toString": "TOK_DESTINATION",
              "toStringTree": "(tok_destination (tok_dir tok_tmp_file))"
            },
            {
              "startIndex": 0,
              "stopIndex": 12,
              "token": {
                "type": 878,
                "line": 0,
                "charPositionInLine": -1,
                "channel": 0,
                "text": "TOK_SELECT",
                "index": -1,
                "start": 0,
                "stop": 0
              },
              "childIndex": 1,
              "children": [
                {
                  "startIndex": 2,
                  "stopIndex": 9,
                  "token": {
                    "type": 880,
                    "line": 0,
                    "charPositionInLine": -1,
                    "channel": 0,
                    "text": "TOK_SELEXPR",
                    "index": -1,
                    "start": 0,
                    "stop": 0
                  },
                  "childIndex": 0,
                  "children": [
                    {
                      "startIndex": 2,
                      "stopIndex": 5,
                      "token": {
                        "type": 752,
                        "line": 0,
                        "charPositionInLine": -1,
                        "channel": 0,
                        "text": "TOK_FUNCTIONSTAR",
                        "index": -1,
                        "start": 0,
                        "stop": 0
                      },
                      "childIndex": 0,
                      "children": [
                        {
                          "startIndex": 2,
                          "stopIndex": 2,
                          "token": {
                            "type": 26,
                            "line": 1,
                            "charPositionInLine": 7,
                            "channel": 0,
                            "index": 2,
                            "start": 7,
                            "stop": 11
                          },
                          "childIndex": 0,
                          "children": [],
                          "toString": "count",
                          "toStringTree": "count"
                        }
                      ],
                      "toString": "TOK_FUNCTIONSTAR",
                      "toStringTree": "(tok_functionstar count)"
                    },
                    {
                      "startIndex": 9,
                      "stopIndex": 9,
                      "token": {
                        "type": 26,
                        "line": 1,
                        "charPositionInLine": 19,
                        "channel": 0,
                        "index": 9,
                        "start": 19,
                        "stop": 23
                      },
                      "childIndex": 1,
                      "children": [],
                      "toString": "count",
                      "toStringTree": "count"
                    }
                  ],
                  "toString": "TOK_SELEXPR",
                  "toStringTree": "(tok_selexpr (tok_functionstar count) count)"
                },
                {
                  "startIndex": 12,
                  "stopIndex": 12,
                  "token": {
                    "type": 880,
                    "line": 0,
                    "charPositionInLine": -1,
                    "channel": 0,
                    "text": "TOK_SELEXPR",
                    "index": -1,
                    "start": 0,
                    "stop": 0
                  },
                  "childIndex": 1,
                  "children": [
                    {
                      "startIndex": 12,
                      "stopIndex": 12,
                      "token": {
                        "type": 950,
                        "line": 0,
                        "charPositionInLine": -1,
                        "channel": 0,
                        "text": "TOK_TABLE_OR_COL",
                        "index": -1,
                        "start": 0,
                        "stop": 0
                      },
                      "childIndex": 0,
                      "children": [
                        {
                          "startIndex": 12,
                          "stopIndex": 12,
                          "token": {
                            "type": 26,
                            "line": 1,
                            "charPositionInLine": 26,
                            "channel": 0,
                            "index": 12,
                            "start": 26,
                            "stop": 32
                          },
                          "childIndex": 0,
                          "children": [],
                          "toString": "myfield",
                          "toStringTree": "myfield"
                        }
                      ],
                      "toString": "TOK_TABLE_OR_COL",
                      "toStringTree": "(tok_table_or_col myfield)"
                    }
                  ],
                  "toString": "TOK_SELEXPR",
                  "toStringTree": "(tok_selexpr (tok_table_or_col myfield))"
                }
              ],
              "toString": "TOK_SELECT",
              "toStringTree": "(tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield)))"
            }
          ],
          "toString": "TOK_INSERT",
          "toStringTree": "(tok_insert (tok_destination (tok_dir tok_tmp_file)) (tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield))))"
        }
      ],
      "toString": "TOK_QUERY",
      "toStringTree": "(tok_query (tok_from (tok_tabref (tok_tabname 0rz))) (tok_insert (tok_destination (tok_dir tok_tmp_file)) (tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield)))))"
    },
    {
      "startIndex": 18,
      "stopIndex": 18,
      "token": {
        "type": -1,
        "line": 2,
        "charPositionInLine": 0,
        "channel": 0,
        "index": 18,
        "start": 43,
        "stop": 43
      },
      "childIndex": 1,
      "children": [],
      "toString": "<EOF>",
      "toStringTree": "<eof>"
    }
  ],
  "toString": "nil",
  "toStringTree": "(tok_query (tok_from (tok_tabref (tok_tabname 0rz))) (tok_insert (tok_destination (tok_dir tok_tmp_file)) (tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield))))) <eof>"
}

About

HiveQL Parser. Parse HiveQL code and print AST in JSON format if success, else print well formed syntax error message.

Topics

Resources

Packages

No packages published

Languages

You can’t perform that action at this time.