Skip to content

lark.exceptions.UnexpectedToken: Unexpected token Token('RULE', 'command') #1523

Open
@lemon-little

Description

@lemon-little
from lark import Lark, Transformer
from typing import Dict, List, Any, Optional

# 定义华为配置文件的语法规则
GRAMMAR = """
    // 主规则:配置文件由多个命令组成
    start: command*
    
    // 命令可以是简单命令或块命令
    command: simple_command | interface_command | aaa_command | dhcp_command | vty_command
    
    // 简单命令:命令名 + 参数 + 换行
    simple_command: WORD argument* NEWLINE
    
    // 接口命令块
    interface_command: "interface" (PHYSICAL_INTERFACE | VLAN_INTERFACE) NEWLINE
                     INDENT command* DEDENT
                     "quit" NEWLINE
    
    // AAA命令块
    aaa_command: "aaa" NEWLINE
                INDENT command* DEDENT
                "quit" NEWLINE
    
    // DHCP命令块
    dhcp_command: "dhcp" WORD NEWLINE
                 INDENT command* DEDENT
                 "quit" NEWLINE
    
    // VTY命令块
    vty_command: "user-interface" WORD NEWLINE
                INDENT command* DEDENT
                "quit" NEWLINE
    
    // 参数可以是以下类型
    argument: WORD | IP_ADDRESS | NUMBER | STRING | PHYSICAL_INTERFACE | VLAN_INTERFACE | VLAN_LIST
    
    // 注释以#或!开头
    COMMENT: /^[#!].*$/
    
    // IP地址格式
    IP_ADDRESS: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
    
    // 数字
    NUMBER: /\d+/
    
    // 字符串(用双引号包围)
    STRING: /"[^"]*"/
    
    // 物理接口名称(如:gei1/0/1)
    PHYSICAL_INTERFACE: /[a-zA-Z][a-zA-Z0-9_-]*\/\d+\/\d+/
    
    // VLAN接口(如:vlan 100)
    VLAN_INTERFACE: /vlan\s+\d+/
    
    // VLAN列表(如:100 138)
    VLAN_LIST: /\d+(\s+\d+)*/
    
    // 命令名(以字母开头,可包含字母、数字、下划线和连字符)
    WORD: /[a-zA-Z][a-zA-Z0-9_-]*/
    
    // 导入Lark通用规则
    %import common.WS
    %import common.NEWLINE
    %import common.INDENT
    %import common.DEDENT
    
    // 忽略空白字符和注释
    %ignore WS
    %ignore COMMENT
"""

class HuaweiTransformer(Transformer):
    """转换器类,将语法树转换为结构化数据"""
    
    def start(self, items):
        """处理整个配置文件"""
        return items
    
    def command(self, items):
        """处理单个命令"""
        return items[0]
    
    def simple_command(self, items):
        """处理简单命令"""
        return {
            'type': 'simple',
            'command': str(items[0]),
            'arguments': [str(arg) for arg in items[1:-1]]  # 排除换行符
        }
    
    def interface_command(self, items):
        """处理接口命令块"""
        return {
            'type': 'interface',
            'name': str(items[1]),
            'commands': items[3] if len(items) > 3 else []
        }
    
    def aaa_command(self, items):
        """处理AAA命令块"""
        return {
            'type': 'aaa',
            'commands': items[2] if len(items) > 2 else []
        }
    
    def dhcp_command(self, items):
        """处理DHCP命令块"""
        return {
            'type': 'dhcp',
            'name': str(items[1]),
            'commands': items[3] if len(items) > 3 else []
        }
    
    def vty_command(self, items):
        """处理VTY命令块"""
        return {
            'type': 'vty',
            'name': str(items[1]),
            'commands': items[3] if len(items) > 3 else []
        }
    
    def argument(self, items):
        """处理参数"""
        return items[0]

class HuaweiConfigParser:
    """华为配置文件解析器"""
    
    def __init__(self):
        """初始化解析器"""
        self.parser = Lark(GRAMMAR, parser='lalr')
        self.transformer = HuaweiTransformer()
    
    def parse(self, config_text: str) -> List[Dict[str, Any]]:
        """
        解析华为配置文件文本
        
        Args:
            config_text: 配置文件文本内容
            
        Returns:
            解析后的配置列表
        """
        tree = self.parser.parse(config_text)
        return self.transformer.transform(tree)

def parse_file(file_path: str) -> List[Dict[str, Any]]:
    """
    解析华为配置文件
    
    Args:
        file_path: 配置文件路径
        
    Returns:
        解析后的配置列表
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        config_text = f.read()
    
    parser = HuaweiConfigParser()
    return parser.parse(config_text)

if __name__ == '__main__':
    # 示例用法
    sample_config = """system-view
vlan batch 100 138
sysname SW01-CS06
#
interface vlan 100
    ip address 192.168.0.6 255.255.255.0
quit
#
interface gei1/0/1
    description Server-31-Mgmt
    port link-type access
    port default vlan 138
    undo shutdown
    stp edged-port enable
quit
    """
    
    parser = HuaweiConfigParser()
    result = parser.parse(sample_config)
    print("解析结果:")
    for item in result:
        print(item) 

Bug Report

lark.exceptions.UnexpectedToken: Unexpected token Token('RULE', 'command') at line 13, column 29.
Expected one of:
        * _DOT
        * _COLON

How to resolve this question? I'd be grateful if someone could take a look at this.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions