xml格式文件(大概有50G)转换为json格式 上传到mongodb数据库中

前端之家收集整理的这篇文章主要介绍了xml格式文件(大概有50G)转换为json格式 上传到mongodb数据库中前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
  1. 偏小数据的就不做多描述,网上有很多资料,在此有大概50Gxml格式的地理数据转换为json格式的数据,之后上传mongodb数据库中,有什么好的建议,欢迎指正
  1. 解析xml数据
  1. import java.util.ArrayList;
  2. import java.util.List;
  3.  
  4. import org.xml.sax.Attributes;
  5. import org.xml.sax.ContentHandler;
  6. import org.xml.sax.Locator;
  7. import org.xml.sax.SAXException;
  8.  
  9. import com.mongodb.DBObject;
  10. /*
  11. * @author
  12. * @time 2015-11-8
  13. * 主要是是implements ContentHandler,主要实现接口ContentHandler中的startDocument()、endDocument()、startElement()、endElement()
  14. * 另外自定义方法writeToMongoDB()、storeDBMongo()
  15. *
  16. */
  17.  
  18. public class MyContentHandler implements ContentHandler {
  19. private StringBuffer buf;
  20. private String ctitle;
  21. private String cns;
  22. private String cid;
  23. private String ctext;
  24. private String ctimestamp;
  25. private int idnumber=0;
  26. List<Data> listdata=new ArrayList<Data>();
  27. List list=new ArrayList();
  28. @Override
  29. public void setDocumentLocator(Locator locator) {
  30. // TODO Auto-generated method stub
  31.  
  32. }
  33.  
  34. @Override
  35. public void startDocument() throws SAXException {
  36. // TODO Auto-generated method stub
  37. buf=new StringBuffer();
  38. System.out.println("*******解析开始*******");
  39. }
  40.  
  41. @Override
  42. public void endDocument() throws SAXException {
  43. // TODO Auto-generated method stub
  44. try {
  45. writeToMongoDB();
  46. } catch (Exception e) {
  47. // TODO Auto-generated catch block
  48. e.printStackTrace();
  49. }
  50. System.out.println("*******解析结束*******");
  51. }
  52. //把数据导入MongoDB数据库
  53. private void writeToMongoDB() throws Exception {
  54. // TODO Auto-generated method stub
  55. List<DBObject> dblist=new ArrayList<DBObject>();
  56. for(Data d:listdata){
  57. dblist.add(BSONT.mapToBSON(d.toJSONMap()));
  58. }
  59. MongoDBT.writeListToMongo("IP",27017,"databaseName","collectionName",dblist);
  60. }
  61.  
  62. @Override
  63. public void startPrefixMapping(String prefix,String uri)
  64. throws SAXException {
  65. // TODO Auto-generated method stub
  66. }
  67.  
  68. @Override
  69. public void endPrefixMapping(String prefix) throws SAXException {
  70. // TODO Auto-generated method stub
  71. }
  72.  
  73. @Override
  74. public void startElement(String uri,String localName,String qName,Attributes attributes) throws SAXException {
  75. // TODO Auto-generated method stub
  76. if(qName=="page"){
  77. idnumber=1;
  78. }
  79. if(qName=="title"){
  80. ctitle=qName;
  81. }else if(qName=="ns"){
  82. cns=qName;
  83. }else if(qName=="id"&&idnumber==1){
  84. cid=qName;
  85. idnumber=0;
  86. }else if(qName=="timestamp"){
  87. ctimestamp=qName;
  88. }else if(qName=="text"){
  89. ctext=qName;
  90. }
  91. }
  92. @Override
  93. public void endElement(String uri,String qName)
  94. throws SAXException {
  95. // TODO Auto-generated method stub
  96. if(ctitle==qName){
  97. String sss=buf.toString();
  98. ctitle="";
  99. list.add(sss);
  100. buf.setLength(0);
  101. }else if(cns==qName){
  102. cns="";
  103. String sss=buf.toString();
  104. list.add(sss);
  105. buf.setLength(0);
  106. }else if(cid==qName){
  107. cid="";
  108. String sss=buf.toString();
  109. list.add(sss);
  110. buf.setLength(0);
  111. }else if(ctimestamp==qName){
  112. ctimestamp="";
  113. String sss=buf.toString();
  114. list.add(sss);
  115. buf.setLength(0);
  116. }else if(ctext==qName){
  117. ctext="";
  118. String sss=buf.toString();
  119. list.add(sss);
  120. buf.setLength(0);
  121. //有些sss中虽然有重定向标记,但没有“[[”和“]]”,那么就会出现String的index不在范围内的问题
  122. if((sss.toUpperCase().contains("#REDIRECT")||sss.contains("#重定向"))&&sss.contains("[[")&&sss.contains("]]")){
  123. int i=sss.indexOf("[[");
  124. int j=sss.indexOf("]]");
  125. String s=sss.substring(i+2,j);
  126. list.add(s);
  127. list.add("redirect");
  128. }else{
  129. list.add("");
  130. list.add("article");
  131. }
  132. }
  133. if(qName=="page"){
  134. storeDBMongo(list);
  135. }
  136. }
  137.  
  138. private void storeDBMongo(List lt) {
  139. // TODO Auto-generated method stub
  140. for(int i=0;i<list.size();i++){
  141. System.out.println(lt.get(i));
  142. }
  143. try {
  144. Data data=new Data();
  145. data.setTitle(list.get(0).toString());
  146. data.setNamespace(list.get(1).toString());
  147. data.setId(list.get(2).toString());
  148. data.setLastEsited(list.get(3).toString());
  149. data.setMarkup(list.get(4).toString());
  150. data.setTarget(list.get(5).toString());
  151. data.setType(list.get(6).toString());
  152. listdata.add(data);
  153. if(listdata.size()>=300){
  154. writeToMongoDB();
  155. listdata.clear();
  156. }
  157. list.clear();
  158. } catch (Exception e) {
  159. // TODO Auto-generated catch block
  160. e.printStackTrace();
  161. }
  162. }
  163.  
  164. @Override
  165. public void characters(char[] ch,int start,int length)
  166. throws SAXException {
  167. // TODO Auto-generated method stub
  168. if(ctitle=="title"){
  169. buf.append(new String(ch,start,length));
  170. }else if(cns=="ns"){
  171. buf.append(new String(ch,length));
  172. }else if(cid=="id"){
  173. buf.append(new String(ch,length));
  174. list.add(new String(ch,length));
  175. }else if(ctimestamp=="timestamp"){
  176. buf.append(new String(ch,length));
  177. }else if(ctext=="text"){
  178. buf.append(new String(ch,length));
  179. }
  180. }
  181.  
  182. @Override
  183. public void ignorableWhitespace(char[] ch,int length)
  184. throws SAXException {
  185. // TODO Auto-generated method stub
  186.  
  187. }
  188.  
  189. @Override
  190. public void processingInstruction(String target,String data)
  191. throws SAXException {
  192. // TODO Auto-generated method stub
  193.  
  194. }
  195.  
  196. @Override
  197. public void skippedEntity(String name) throws SAXException {
  198. // TODO Auto-generated method stub
  199.  
  200. }
  201. }

自定义类Data、JSONT

  1. import java.util.HashMap;
  2. import java.util.Map;
  3.  
  4.  
  5. public class Data {
  6. private String id;
  7. private String namespace;
  8. private String type;
  9. private String title;
  10. private String markup;
  11. private String lastEsited;
  12. private String target;
  13. public String getId() {
  14. return id;
  15. }
  16. public void setId(String id) {
  17. this.id = id;
  18. }
  19. public String getNamespace() {
  20. return namespace;
  21. }
  22. public void setNamespace(String namespace) {
  23. this.namespace = namespace;
  24. }
  25. public String getType() {
  26. return type;
  27. }
  28. public void setType(String type) {
  29. this.type = type;
  30. }
  31. public String getTitle() {
  32. return title;
  33. }
  34. public void setTitle(String title) {
  35. this.title = title;
  36. }
  37. public String getMarkup() {
  38. return markup;
  39. }
  40. public void setMarkup(String markup) {
  41. this.markup = markup;
  42. }
  43. public String getLastEsited() {
  44. return lastEsited;
  45. }
  46. public void setLastEsited(String lastEsited) {
  47. this.lastEsited = lastEsited;
  48. }
  49. public String getTarget() {
  50. return target;
  51. }
  52. public void setTarget(String target) {
  53. this.target = target;
  54. }
  55. public Map<String,Object> toJSONMap(){
  56. Map<String,Object> jsonmap=new HashMap<String,Object>();
  57. jsonmap.put("id",this.id);
  58. jsonmap.put("namespace",this.namespace);
  59. jsonmap.put("type",this.type);
  60. jsonmap.put("title",this.title);
  61. jsonmap.put("markup",this.markup);
  62. jsonmap.put("lastEsited",this.lastEsited);
  63. jsonmap.put("target",this.target);
  64. return jsonmap;
  65. }
  66. }
  67.  
  68.  
  69. /*
  70. * NextMap-Crawler Module
  71. *
  72. * Copyright (C) 2002-2014,Institute of Geographic Sciences and Natural Resources Research,* Chinese Academy of Sciences
  73. *
  74. * This library is free software; you can redistribute it and/or
  75. * modify it under the terms of the GNU Lesser General Public
  76. * License as published by the Free Software Foundation;
  77. * version 2.1 of the License.
  78. *
  79. * This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of
  80. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  81. * Lesser General Public License for more details.
  82. */
  83.  
  84.  
  85. import java.io.IOException;
  86. import java.io.StringWriter;
  87. import java.util.List;
  88. import java.util.Map;
  89.  
  90. import com.fasterxml.jackson.databind.ObjectMapper;
  91.  
  92. /**
  93. *
  94. * @author zhuhaichuan
  95. * @date 2015-11-8
  96. *
  97. *
  98. */
  99.  
  100. public class JSONT {
  101. public static String mapToJSONString(Map map) {
  102. StringWriter sw = new StringWriter();
  103. try {
  104. ObjectMapper mapper = new ObjectMapper();
  105. mapper.writeValue(sw,map);
  106. } catch (IOException e) {
  107. // TODO Auto-generated catch block
  108. e.printStackTrace();
  109. }
  110. return sw.toString();
  111. }
  112.  
  113. /**
  114. *
  115. * @param list
  116. * @return
  117. */
  118. public static String listToJSONString(List list) {
  119. StringWriter sw = new StringWriter();
  120. try {
  121. ObjectMapper mapper = new ObjectMapper();
  122. mapper.writeValue(sw,list);
  123. } catch (IOException e) {
  124. // TODO Auto-generated catch block
  125. e.printStackTrace();
  126. }
  127. return sw.toString();
  128. }
  129.  
  130. /**
  131. *
  132. * @param list
  133. * @return
  134. */
  135. public static String beanToJSONString(Object bean) {
  136. StringWriter sw = new StringWriter();
  137. try {
  138. ObjectMapper mapper = new ObjectMapper();
  139. mapper.writeValue(sw,bean);
  140. } catch (IOException e) {
  141. // TODO Auto-generated catch block
  142. e.printStackTrace();
  143. }
  144. return sw.toString();
  145. }
  146.  
  147. /**
  148. *
  149. * @param jsonstr
  150. * @return
  151. */
  152. public static Map jsonToMap(String jsonstr) {
  153. Map map = null;
  154. try {
  155. ObjectMapper mapper = new ObjectMapper();
  156. map = mapper.readValue(jsonstr,Map.class);
  157. } catch (IOException e) {
  158. // TODO Auto-generated catch block
  159. e.printStackTrace();
  160. }
  161. return map;
  162. }
  163.  
  164. /**
  165. *
  166. * @param jsonstr
  167. * @return
  168. */
  169. public static List jsonToList(String jsonstr) {
  170. List list = null;
  171. try {
  172. ObjectMapper mapper = new ObjectMapper();
  173. list = mapper.readValue(jsonstr,List.class);
  174. } catch (IOException e) {
  175. // TODO Auto-generated catch block
  176. e.printStackTrace();
  177. }
  178. return list;
  179. }
  180. }
还有就是类MyErrorHandler
  1. import org.xml.sax.ErrorHandler;
  2. import org.xml.sax.SAXException;
  3. import org.xml.sax.SAXParseException;
  4.  
  5.  
  6. public class MyErrorHandler implements ErrorHandler {
  7.  
  8. @Override
  9. public void warning(SAXParseException exception) throws SAXException {
  10. // TODO Auto-generated method stub
  11. System.out.println("*******WARNING******");
  12. System.out.println("行号:" + exception.getLineNumber());
  13. System.out.println("列号:" + exception.getColumnNumber());
  14. System.out.println("exception信息:" + exception.getMessage());
  15. System.out.println("********************");
  16. }
  17.  
  18. @Override
  19. public void error(SAXParseException exception) throws SAXException {
  20. // TODO Auto-generated method stub
  21. System.out.println("******* ERROR ******");
  22. System.out.println("行号:" + exception.getLineNumber());
  23. System.out.println("列号:" + exception.getColumnNumber());
  24. System.out.println("exception信息:" + exception.getMessage());
  25. System.out.println("********************");
  26. }
  27.  
  28. @Override
  29. public void fatalError(SAXParseException exception) throws SAXException {
  30. // TODO Auto-generated method stub
  31. System.out.println("******** FATAL ERROR ********");
  32. System.out.println("行号:" + exception.getLineNumber());
  33. System.out.println("列号:" + exception.getColumnNumber());
  34. System.out.println("exception信息" + exception.getMessage());
  35. System.out.println("*****************************");
  36. }
  37.  
  38. }

自定义MongoDBT类
  1. import java.util.ArrayList;
  2. import java.util.List;
  3.  
  4. import com.mongodb.DB;
  5. import com.mongodb.DBCollection;
  6. import com.mongodb.DBObject;
  7. import com.mongodb.Mongo;
  8.  
  9.  
  10. public class MongoDBT {
  11. public static void writeListToMongo(String ip,int port,String dbname,String collname,List<DBObject> list) throws Exception{
  12. Mongo mongo=new Mongo(ip,port);
  13. DB db=mongo.getDB(dbname);
  14. DBCollection collection=db.getCollection(collname);
  15. List<DBObject> dblist=new ArrayList<DBObject>();
  16. for(int i=0;i<list.size();i++){
  17. dblist.add(list.get(i));
  18. }
  19. collection.insert(dblist);
  20. mongo.close();
  21. }
  22. }

猜你在找的XML相关文章