几种xml读取方法比较

这几天手上有个活,解析xml,众所周知xml的解析方法有:

  1. DOM
  2. SAX
  3. linq to xml
  4. plinq

测试用xml和生成代码

@H_403_17@
 1@H_403_17@ static@H_403_17@ void@H_403_17@ CreateFile()
@H_403_17@ 2@H_403_17@         {
@H_403_17@ 3@H_403_17@             int@H_403_17@ N = 5000000@H_403_17@;
@H_403_17@ 4@H_403_17@             Random rand = new@H_403_17@ Random();
@H_403_17@ 5@H_403_17@             using@H_403_17@ (var@H_403_17@ writer = new@H_403_17@ XmlTextWriter("@H_403_17@VeryHugeXmlFile.xml@H_403_17@"@H_403_17@,Encoding.UTF8))
@H_403_17@ 6@H_403_17@             {
@H_403_17@ 7@H_403_17@                 writer.Formatting = Formatting.Indented;
@H_403_17@ 8@H_403_17@ 
 9@H_403_17@                 writer.WriteStartDocument();
@H_403_17@10@H_403_17@                 writer.WriteStartElement("@H_403_17@Root@H_403_17@"@H_403_17@);
@H_403_17@11@H_403_17@                 for@H_403_17@ (int@H_403_17@ count = 1@H_403_17@; count <= N; count++)
@H_403_17@12@H_403_17@                 {
@H_403_17@13@H_403_17@                     writer.WriteStartElement("@H_403_17@Person@H_403_17@"@H_403_17@);
@H_403_17@14@H_403_17@                     writer.WriteElementString("@H_403_17@Id@H_403_17@"@H_403_17@,count.ToString());
@H_403_17@15@H_403_17@                     writer.WriteElementString("@H_403_17@Name@H_403_17@"@H_403_17@,rand.Next().ToString());
@H_403_17@16@H_403_17@                     writer.WriteElementString("@H_403_17@Sex@H_403_17@"@H_403_17@,rand.Next(0@H_403_17@,2@H_403_17@) == 0@H_403_17@ ? "@H_403_17@男@H_403_17@"@H_403_17@ : "@H_403_17@女@H_403_17@"@H_403_17@);
@H_403_17@17@H_403_17@                     writer.WriteElementString("@H_403_17@Age@H_403_17@"@H_403_17@,rand.Next(1@H_403_17@,101@H_403_17@).ToString());
@H_403_17@18@H_403_17@                     writer.WriteEndElement();
@H_403_17@19@H_403_17@                 }
@H_403_17@20@H_403_17@                 writer.WriteEndElement();
@H_403_17@21@H_403_17@                 writer.WriteEndDocument();
@H_403_17@22@H_403_17@             }
@H_403_17@23@H_403_17@         }
@H_403_17@

之后会生成类似于下面的xml文件

@H_403_17@
 1@H_403_17@ <?@H_403_17@xml version="1.0" encoding="utf-8"@H_403_17@?>@H_403_17@
 2@H_403_17@ <@H_403_17@Root@H_403_17@>@H_403_17@
 3@H_403_17@   <@H_403_17@Person@H_403_17@>@H_403_17@
 4@H_403_17@     <@H_403_17@Id@H_403_17@>@H_403_17@1</@H_403_17@Id@H_403_17@>@H_403_17@
 5@H_403_17@     <@H_403_17@Name@H_403_17@>@H_403_17@897639886</@H_403_17@Name@H_403_17@>@H_403_17@
 6@H_403_17@     <@H_403_17@Sex@H_403_17@>@H_403_17@女</@H_403_17@Sex@H_403_17@>@H_403_17@
 7@H_403_17@     <@H_403_17@Age@H_403_17@>@H_403_17@80</@H_403_17@Age@H_403_17@>@H_403_17@
 8@H_403_17@   </@H_403_17@Person@H_403_17@>@H_403_17@
 9@H_403_17@   <@H_403_17@Person@H_403_17@>@H_403_17@
10@H_403_17@     <@H_403_17@Id@H_403_17@>@H_403_17@2</@H_403_17@Id@H_403_17@>@H_403_17@
11@H_403_17@     <@H_403_17@Name@H_403_17@>@H_403_17@2012162696</@H_403_17@Name@H_403_17@>@H_403_17@
12@H_403_17@     <@H_403_17@Sex@H_403_17@>@H_403_17@女</@H_403_17@Sex@H_403_17@>@H_403_17@
13@H_403_17@     <@H_403_17@Age@H_403_17@>@H_403_17@60</@H_403_17@Age@H_403_17@>@H_403_17@
14@H_403_17@   </@H_403_17@Person@H_403_17@>@H_403_17@
15@H_403_17@   <@H_403_17@Person@H_403_17@>@H_403_17@
@H_403_17@

xml下载链接

测试代码

统计时间(只是粗略统计了一下运行时间)

@H_403_17@
1@H_403_17@ static@H_403_17@ void@H_403_17@ Watch(Action<string@H_403_17@> way,string@H_403_17@ file)
@H_403_17@2@H_403_17@         {
@H_403_17@3@H_403_17@             Stopwatch watch = new@H_403_17@ Stopwatch();
@H_403_17@4@H_403_17@ 
5@H_403_17@             watch.Start();
@H_403_17@6@H_403_17@             way(file);
@H_403_17@7@H_403_17@             watch.Stop();
@H_403_17@8@H_403_17@             Console.WriteLine(watch.ElapsedMilliseconds);
@H_403_17@9@H_403_17@         }
@H_403_17@

DOM

@H_403_17@
1@H_403_17@ static@H_403_17@ void@H_403_17@ DomWay(string@H_403_17@ file)
@H_403_17@2@H_403_17@         {
@H_403_17@3@H_403_17@             XmlDocument doc = new@H_403_17@ XmlDocument();
@H_403_17@4@H_403_17@             doc.Load(file);
@H_403_17@5@H_403_17@ 
6@H_403_17@             Console.WriteLine(doc.SelectNodes(YOUR-XPATH-HERE).Count);
@H_403_17@7@H_403_17@ 
8@H_403_17@         }
@H_403_17@

SAX

@H_403_17@
 1@H_403_17@ static@H_403_17@ void@H_403_17@ SaxWay(string@H_403_17@ file)
@H_403_17@ 2@H_403_17@         {
@H_403_17@ 3@H_403_17@             using@H_403_17@ (XmlTextReader reader = new@H_403_17@ XmlTextReader(file))
@H_403_17@ 4@H_403_17@             {
@H_403_17@ 5@H_403_17@                 int@H_403_17@ count = 0@H_403_17@;
@H_403_17@ 6@H_403_17@                 while@H_403_17@ (reader.Read())
@H_403_17@ 7@H_403_17@                 {
@H_403_17@ 8@H_403_17@                     if@H_403_17@ (reader.Name == "@H_403_17@Person@H_403_17@"@H_403_17@ && reader.NodeType == XmlNodeType.Element)
@H_403_17@ 9@H_403_17@                     {
@H_403_17@10@H_403_17@                         reader.Read();
@H_403_17@11@H_403_17@                         reader.Read();
@H_403_17@12@H_403_17@ 
13@H_403_17@                         int@H_403_17@? Id = null@H_403_17@;
@H_403_17@14@H_403_17@                         int@H_403_17@? name = null@H_403_17@;
@H_403_17@15@H_403_17@                         string@H_403_17@ sex = null@H_403_17@;
@H_403_17@16@H_403_17@                         int@H_403_17@? age = null@H_403_17@;
@H_403_17@17@H_403_17@ 
18@H_403_17@                         if@H_403_17@ (reader.Name == "@H_403_17@Id@H_403_17@"@H_403_17@)
@H_403_17@19@H_403_17@                         {
@H_403_17@20@H_403_17@                             Id = reader.ReadElementContentAsInt();
@H_403_17@21@H_403_17@                             reader.Read();
@H_403_17@22@H_403_17@                             name = reader.ReadElementContentAsInt();
@H_403_17@23@H_403_17@                             reader.Read();
@H_403_17@24@H_403_17@                             sex = reader.ReadElementContentAsString();
@H_403_17@25@H_403_17@                             reader.Read();
@H_403_17@26@H_403_17@                             age = reader.ReadElementContentAsInt();
@H_403_17@27@H_403_17@                             reader.Read();
@H_403_17@28@H_403_17@                         }
@H_403_17@29@H_403_17@ 
30@H_403_17@                         if@H_403_17@ (reader.Name == "@H_403_17@Person@H_403_17@"@H_403_17@ && reader.NodeType == XmlNodeType.EndElement)
@H_403_17@31@H_403_17@                             reader.Read();
@H_403_17@32@H_403_17@ 
33@H_403_17@                         if@H_403_17@ (Id != null@H_403_17@ && name != null@H_403_17@ && sex != null@H_403_17@ && age != null@H_403_17@)
@H_403_17@34@H_403_17@                         {
@H_403_17@35@H_403_17@                             if@H_403_17@ (在此设置自定义过滤条件)
@H_403_17@36@H_403_17@                                 count++;
@H_403_17@37@H_403_17@                         }
@H_403_17@38@H_403_17@                     }
@H_403_17@39@H_403_17@                 }
@H_403_17@40@H_403_17@ 
41@H_403_17@                 Console.WriteLine(count);
@H_403_17@42@H_403_17@             }
@H_403_17@43@H_403_17@         }
@H_403_17@

Linq to Xml

@H_403_17@
 1@H_403_17@ static@H_403_17@ void@H_403_17@ LinqWay(string@H_403_17@ file)
@H_403_17@ 2@H_403_17@         {
@H_403_17@ 3@H_403_17@             var@H_403_17@ root = XElement.Load(file);
@H_403_17@ 4@H_403_17@             var@H_403_17@ person = from@H_403_17@ p in@H_403_17@ root.Elements("@H_403_17@Person@H_403_17@"@H_403_17@)@H_403_17@ 
 7@H_403_17@                          where@H_403_17@ 在此设置自定义过滤条件 
 8@H_403_17@                          select@H_403_17@ id;
@H_403_17@ 9@H_403_17@             Console.WriteLine(person.Count());
@H_403_17@10@H_403_17@         }
@H_403_17@

PLinq to Xml

@H_403_17@
 1@H_403_17@ static@H_403_17@ void@H_403_17@ PLinqWay(string@H_403_17@ file)
@H_403_17@ 2@H_403_17@         {
@H_403_17@ 3@H_403_17@             var@H_403_17@ root = XElement.Load(file);
@H_403_17@ 4@H_403_17@             var@H_403_17@ person = from@H_403_17@ p in@H_403_17@ root.Elements("@H_403_17@Person@H_403_17@"@H_403_17@).AsParallel()@H_403_17@ 
 7@H_403_17@                          where@H_403_17@ 在此设置自定义过滤条件 
 8@H_403_17@                          select@H_403_17@ id;
@H_403_17@ 9@H_403_17@             Console.WriteLine(person.Count());
@H_403_17@10@H_403_17@         }
@H_403_17@

统计结果

在6核8G内存机器上,测试程序设置为x64和release模式,在xml查询结果相同的情况下取运行时间(ms),没有详细采集cpu和内存数据

两个模式,区别是加了一个素数的判断。

Id > 5000 && sex == "男"

&& age > 15 && age < 50

Id > 5000 && sex == "男"

&& age > 15 && age < 50 && IsPrimeInt(name)

sax 13857 40010
linq 27336 53760
plinq 24550 28846
dom 31737 0

由于dom模式本身xpath模式不支持嵌入函数,所以第二个测试没有采集结果。

小结

sax:速度优先,内存占用少,但是代码复杂度高。

linq:速度较sax慢,但是代码优雅,维护容易

plinq:同上,在非计算密集型模式中,不比linq和sax模式好多少。但是在计算密集下,后来居上

dom:速度落后,但是原生支持xpath,代码最优雅。

内存方面仅是肉眼观察了任务管理器,sax基本内存曲线为水平线,而linq&plinq在load的时候分配内存,可能其内部也是用了dom。

仓促行文,其中必有不实之处,往各位劳神指教。

相关文章

引言 NOKIA 有句著名的广告语:“科技以人为本”。任何技术都是为了满足人的生产生活需要而产生的。具体...
Writer:BYSocket(泥沙砖瓦浆木匠) 微博:BYSocket 豆瓣:BYSocket Reprint it anywhere u want. 文章...
Writer:BYSocket(泥沙砖瓦浆木匠) 微博:BYSocket 豆瓣:BYSocket Reprint it anywhere u want. 文章...
http://blog.jobbole.com/79252/ 引言 NOKIA 有句著名的广告语:“科技以人为本”。任何技术都是为了满...
(点击上方公众号,可快速关注) 公众号:smart_android 作者:耿广龙|loonggg 点击“阅读原文”,可查看...
一、xml与xslt 相信所有人对xml都不陌生,其被广泛的应用于数据数据传输、保存与序列化中,是一种极为强...