我有一个word文档的文件夹,我想转换为html进行进一步处理.我只有Visual Studio 2010 Express版.可以使用快递版吗?我找到了如何进行转换的示例,但它们需要Microsoft.Office.Tools.Word库,它不附带Express.
编辑:我发现它,它实际上在COM对象中称为Microsoft Word 12.0对象库,它是Microsoft.Office.Interop.Word命名空间.
解决方法
您应该能够使用快速版本.我改编了
this question的答案.改编的代码如下.您需要添加对Microsoft.Office.Interop.Word的引用才能使其正常工作.如果您错过了这个库,请查看
this article on MSDN.
查看WdSaveFormat,您还可以将其另存为格式过滤HTML(wdFormatFilteredHTML).
- namespace Sample {
- using Microsoft.Office.Interop.Word;
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Text;
- class Program {
- public static void Main()
- {
- Convert("C:\\Documents",WdSaveFormat.wdFormatHTML);
- }
- private static void Convert(string path,WdSaveFormat format)
- {
- DirectoryInfo dirInfo = new DirectoryInfo(path);
- FileInfo[] wordFiles = dirInfo.GetFiles("*.doc");
- if (wordFiles.Length == 0) {
- return;
- }
- object oMissing = System.Reflection.Missing.Value;
- Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
- try {
- word.Visible = false;
- word.ScreenUpdating = false;
- foreach (FileInfo wordFile in wordFiles) {
- Object filename = (Object)wordFile.FullName;
- Document doc = word.Documents.Open(ref filename,ref oMissing,ref oMissing);
- try {
- doc.Activate();
- object outputFileName = wordFile.FullName.Replace(".doc",".html");
- object fileFormat = format;
- doc.SaveAs(ref outputFileName,ref fileFormat,ref oMissing);
- }
- finally {
- object saveChanges = WdSaveOptions.wdDoNotSaveChanges;
- ((_Document)doc).Close(ref saveChanges,ref oMissing);
- doc = null;
- }
- }
- }
- finally {
- ((_Application)word).Quit(ref oMissing,ref oMissing);
- word = null;
- }
- }
- }
- }