我正在尝试为if-else类型结构创建一个非常简单的解析器,它将构建并执行sql语句.
我会测试构建字符串的条件,而不是测试执行语句的条件.
一个例子是:
select column1 from #if(VariableA = Case1) table1 #else if(VariableA = Case2) table2 #else defaultTable #end
如果VariableA等于Case1,则结果字符串应为:从table1中选择column1
一个更复杂的例子是嵌套的if语句:
select column1 from #if(VariableA = Case1) #if(VariableB = Case3) table3 #else table4 #else if(VariableA = Case2) table2 #else defaultTable #end
这是我真正遇到麻烦的地方,我想不出一个正确识别每个if-else-end组的好方法.
另外,我不确定跟踪“else”子句中的字符串是否应该评估为true的好方法.
我一直在网上寻找不同类型的解析算法,所有这些算法看起来都非常抽象和复杂.
对于这个非计算机科学专业,有什么建议吗?
解决方法
我写了一个简单的解析器,我根据你提供的例子进行了测试.如果你想了解更多关于解析的信息,我建议你阅读Niklaus Wirth的
Compiler Construction.
第一步始终是以适当的方式记下语言的语法.我选择了EBNF,这很容易理解.
|分开替代品.
[和]附上选项.
{和}表示重复(零,一个或多个).
(和)组表达式(此处未使用).
此描述并不完整,但我提供的链接更详细地描述了它.
EBNF语法
LineSequence = { TextLine | IfStatement }. TextLine = <string>. IfStatement = IfLine LineSequence { ElseIfLine LineSequence } [ ElseLine LineSequence ] EndLine. IfLine = "#if" "(" Condition ")". ElseLine = "#else". ElseIfLine = "#else" "if" "(" Condition ")". EndLine = "#end". Condition = Identifier "=" Identifier. Identifier = <letter_or_underline> { <letter_or_underline> | <digit> }.
解析器严格遵循语法,即将重复转换为循环,替换为if-else语句,依此类推.
using System; using System.Collections.Generic; using System.Text.RegularExpressions; using System.Windows.Forms; namespace Example.sqlPreprocessor { class Parser { enum Symbol { None,LPar,RPar,Equals,Text,NumberIf,If,NumberElse,NumberEnd,Identifier } List<string> _input; // Raw sql with preprocessor directives. int _currentLineIndex = 0; // Simulates variables used in conditions Dictionary<string,string> _variableValues = new Dictionary<string,string> { { "VariableA","Case1" },{ "VariableB","CaseX" } }; Symbol _sy; // Current symbol. string _string; // Identifier or text line; Queue<string> _textQueue = new Queue<string>(); // Buffered text parts of a single line. int _lineNo; // Current line number for error messages. string _line; // Current line for error messages. /// <summary> /// Get the next line from the input. /// </summary> /// <returns>Input line or null if no more lines are available.</returns> string GetLine() { if (_currentLineIndex >= _input.Count) { return null; } _line = _input[_currentLineIndex++]; _lineNo = _currentLineIndex; return _line; } /// <summary> /// Get the next symbol from the input stream and stores it in _sy. /// </summary> void GetSy() { string s; if (_textQueue.Count > 0) { // Buffered text parts available,use one from these. s = _textQueue.Dequeue(); switch (s.ToLower()) { case "(": _sy = Symbol.LPar; break; case ")": _sy = Symbol.RPar; break; case "=": _sy = Symbol.Equals; break; case "if": _sy = Symbol.If; break; default: _sy = Symbol.Identifier; _string = s; break; } return; } // Get next line from input. s = GetLine(); if (s == null) { _sy = Symbol.None; return; } s = s.Trim(' ','\t'); if (s[0] == '#') { // We have a preprocessor directive. // Split the line in order to be able get its symbols. string[] parts = Regex.Split(s,@"\b|[^#_a-zA-Z0-9()=]"); // parts[0] = # // parts[1] = if,else,end switch (parts[1].ToLower()) { case "if": _sy = Symbol.NumberIf; break; case "else": _sy = Symbol.NumberElse; break; case "end": _sy = Symbol.NumberEnd; break; default: Error("Invalid symbol #{0}",parts[1]); break; } // Store the remaining parts for later. for (int i = 2; i < parts.Length; i++) { string part = parts[i].Trim(' ','\t'); if (part != "") { _textQueue.Enqueue(part); } } } else { // We have an ordinary sql text line. _sy = Symbol.Text; _string = s; } } void Error(string message,params object[] args) { // Make sure parsing stops here _sy = Symbol.None; _textQueue.Clear(); _input.Clear(); message = String.Format(message,args) + String.Format(" in line {0}\r\n\r\n{1}",_lineNo,_line); Output("------"); Output(message); MessageBox.Show(message,"Error"); } /// <summary> /// Writes the processed line to a (simulated) output stream. /// </summary> /// <param name="line">Line to be written to output</param> void Output(string line) { Console.WriteLine(line); } /// <summary> /// Starts the parsing process. /// </summary> public void Parse() { // Simulate an input stream. _input = new List<string> { "select column1","from","#if(VariableA = Case1)"," #if(VariableB = Case3)"," table3"," #else"," table4"," #end","#else if(VariableA = Case2)"," table2","#else"," defaultTable","#end" }; // Clear prevIoUs parsing _textQueue.Clear(); _currentLineIndex = 0; // Get first symbol and start parsing GetSy(); if (LineSequence(true)) { // Finished parsing successfully. //TODO: Do something with the generated sql } else { // Error encountered. Output("*** ABORTED ***"); } } // The following methods parse according the the EBNF Syntax. bool LineSequence(bool writeOutput) { // EBNF: LineSequence = { TextLine | IfStatement }. while (_sy == Symbol.Text || _sy == Symbol.NumberIf) { if (_sy == Symbol.Text) { if (!TextLine(writeOutput)) { return false; } } else { // _sy == Symbol.NumberIf if (!IfStatement(writeOutput)) { return false; } } } return true; } bool TextLine(bool writeOutput) { // EBNF: TextLine = <string>. if (writeOutput) { Output(_string); } GetSy(); return true; } bool IfStatement(bool writeOutput) { // EBNF: IfStatement = IfLine LineSequence { ElseIfLine LineSequence } [ ElseLine LineSequence ] EndLine. bool result; if (IfLine(out result) && LineSequence(writeOutput && result)) { writeOutput &= !result; // Only one section can produce an output. while (_sy == Symbol.NumberElse) { GetSy(); if (_sy == Symbol.If) { // We have an #else if if (!ElseIfLine(out result)) { return false; } if (!LineSequence(writeOutput && result)) { return false; } writeOutput &= !result; // Only one section can produce an output. } else { // We have a simple #else if (!LineSequence(writeOutput)) { return false; } break; // We can have only one #else statement. } } if (_sy != Symbol.NumberEnd) { Error("'#end' expected"); return false; } GetSy(); return true; } return false; } bool IfLine(out bool result) { // EBNF: IfLine = "#if" "(" Condition ")". result = false; GetSy(); if (_sy != Symbol.LPar) { Error("'(' expected"); return false; } GetSy(); if (!Condition(out result)) { return false; } if (_sy != Symbol.RPar) { Error("')' expected"); return false; } GetSy(); return true; } private bool Condition(out bool result) { // EBNF: Condition = Identifier "=" Identifier. string variable; string expectedValue; string variableValue; result = false; // Identifier "=" Identifier if (_sy != Symbol.Identifier) { Error("Identifier expected"); return false; } variable = _string; // The first identifier is a variable. GetSy(); if (_sy != Symbol.Equals) { Error("'=' expected"); return false; } GetSy(); if (_sy != Symbol.Identifier) { Error("Value expected"); return false; } expectedValue = _string; // The second identifier is a value. // Search the variable if (_variableValues.TryGetValue(variable,out variableValue)) { result = variableValue == expectedValue; // Perform the comparison. } else { Error("Variable '{0}' not found",variable); return false; } GetSy(); return true; } bool ElseIfLine(out bool result) { // EBNF: ElseIfLine = "#else" "if" "(" Condition ")". result = false; GetSy(); // "#else" already processed here,we are only called if the symbol is "if" if (_sy != Symbol.LPar) { Error("'(' expected"); return false; } GetSy(); if (!Condition(out result)) { return false; } if (_sy != Symbol.RPar) { Error("')' expected"); return false; } GetSy(); return true; } } }
请注意,嵌套的if语句以非常自然的方式自动处理.首先,语法以递归方式表达. LineSequence可以包含IfStatments,IfStatments包含LineSequences.其次,这导致语法处理方法以递归方式相互调用.因此,语法元素的嵌套被转换为递归方法调用.