题目
本题来自蓝港在线技术团队的idea,详情如下: XML-可扩展标记语言 ,用于标记电子文件使其具有结构性的标记语言,可以用来标记数据、定义数据类型,是一种允许用户对自己的标记语言进行定义的源语言,被广泛的运用于数据传输和存储。请编写一段程序,不使用语言之外的开源库,解析对应的XML文件,并格式化后在屏幕上打印出来。 举个例子如下,当给定下述XML文件时: <?xml version="1.0" ?> <Books> <Book> <Name = “The C++ Programming Language” Author=”Bjarne Stroustrup” /> </Book> <Book> <Name = “Effective C++” Author = “Scott Meyers” /> </Book> </Books> 它对应的输出应该是: Books Book 1 Name:The C++ Programming Language Author:Bjarne Stroustrup Book 2 Name:Effective C++ Author:Scott Meyers 输入:简化的一段xml文件,用字符串表示,如下(属性名字不包含引号和等号,也不包含大于小于等特殊字符,详细规则见下面的答题说明) string in = "<?xml version=\"1.0\" ?><Books><Book><Name = \"The C++ Programming Language\" Author=\"Bjarne Stroustrup\" /></Book><Book><Name = \"Effective C++\" Author = \"Scott Meyers\" /></Book></Books>"; 输出:对输入的xml字符串解析,得到输出如下: string out = "Books\r\n\tBook 1\r\n\t\tName:The C++ Programming Language\r\n\t\tAuthor:Bjarne Stroustrup\r\n\tBook 2\r\n\t\tName:Effective C++\r\n\t\tAuthor:Scott Meyers"; 函数原型: C++ ParsingXML(string in); Java ParsingXML(String in); C# ParsingXML(string input)
挑战规则:
1、xml分为三级,一级为大类,二级为实体,三级为属性 2、三级之间用换行间隔,每级间开始用缩格。 3、二级对象后间隔一个空格,并输出对象所在xml中的顺序号 4、第三级顺序输出属性名称和属性值,属性名和属性值间用:间隔 5、输入格式: 输入为单一xml格式字符串,其中带有"字符。可能带有回车换行符。 6、输出格式: 输出数据中不带"字符,输出格式中的每行开头使用Tab字符作为缩进。 7、属性名字不包含引号和等号,不包含大于小于等特殊字符。 8、不能使用语言之外的开源库。
代码
#include <iostream> #include <stdio.h> #include <string.h> #include <string> #include <vector> #include <stack> using namespace std; enum FSM{FSM_NULL,FSM_START,FSM_LevelOne,FSM_LevelTwo}; void dealAbstract(string &out,string &in,const string &letter) { int flag = 0; string::iterator iter = in.begin(); while(iter < in.end()) { if(*iter == ' ') { ++iter; continue; } switch(flag) { case 0: out += letter; while(iter < in.end() && *iter != '=') { if(*iter != ' ') { out.push_back(*iter); } ++iter; } out.push_back(':'); flag = 1; break; case 1: while(*iter++ != '"'); while(*iter != '"') { out.push_back(*iter); ++iter; } ++iter; flag = 0; break; } } } string ParsingXML(string in) { string out; string tmp; stack<string> mark; stack<enum FSM> markLevel; char index; string letter; FSM fsm_flag = FSM_NULL; markLevel.push(fsm_flag); for(string::iterator iter = in.begin(); iter < in.end(); ++iter) { tmp.clear(); while(*iter++ != '<');//取得标签数据 for(; *iter != '>'; ++iter) { tmp.push_back(*iter); } fsm_flag = markLevel.top();//得到当前标签级别 if(tmp[0] == '/')//结束标签标志 { markLevel.pop(); } else if(tmp[tmp.length() - 1] == '/' || tmp[tmp.length() - 1] == '"')//标签属性 { if(tmp[tmp.length() - 1] != '"') { tmp = tmp.substr(0,tmp.length() - 1); } dealAbstract(out,tmp,letter); } else//开始标签 { switch(fsm_flag)//标签转换状态机 { case FSM_NULL: if(tmp[0] == '?' && tmp[tmp.length() - 1] == '?') { fsm_flag = FSM_START; markLevel.push(fsm_flag); } break; case FSM_START://一级标签 fsm_flag = FSM_LevelOne; markLevel.push(fsm_flag); out += "\r\n" + tmp; letter = "\r\n\t"; index = '0'; break; case FSM_LevelOne://二级标签 fsm_flag = FSM_LevelTwo; letter = "\r\n\t"; markLevel.push(fsm_flag); ++index; out += letter + tmp + ' ' + index; letter = "\r\n\t\t"; break; } } } out = out.substr(2,out.length());//去除开始处的两个回车换行 return out; } int main() { // string in = "<?xml version=\"1.0\" ?><Books><Class = \"art\"/><Book><Name = \"The C++ Programming Language\" Author=\"Bjarne Stroustrup\" /></Book><Book><Name = \"Effective C++\" Author = \"Scott Meyers\" /></Book></Books><VideoS><video><Name = \"1123213\"/></video><video><Name = \"23456\"/></video></VideoS>"; string in = "<?xml version=\"1.0\" ?><Books><Book><Name = \"The C++ Programming Language\" Author=\"Bjarne Stroustrup\"></Book><Book><Name = \"Effective C++\" Author = \"Scott Meyers\"></Book></Books>"; string out; out = ParsingXML(in); cout << out << endl; return 0; }