PS:我只用到了这一句
webClient.getOptions().setThrowExceptionOnScriptError(false);
htmlunit jar项目路径http://sourceforge.net/projects/htmlunit/files/htmlunit/
demo代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
public class AutoLogin { /** 登录页面 */ private static final String LOGIN_URL = "http://website/login.aspx"; /** 任务列表页面 */ private static final String TASK_LIST_URL = "http://website/Banli.aspx"; /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { testHomePage(); } public static void testHomePage() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8); webClient.getOptions().setThrowExceptionOnScriptError(false); //此行必须要加 webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setJavaScriptEnabled(true); // webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(300000); // 获取首页 HtmlPage page = (HtmlPage) webClient.getPage(LOGIN_URL); // 根据form的名字获取页面表单,也可以通过索引来获取:page.getForms().get(0) final HtmlForm form = page.getFormByName("form1"); // 用户名/密码 HtmlTextInput textUserName = form.getInputByName("txtUserName"); textUserName.setText("username"); HtmlPasswordInput txtPwd = form.getInputByName("txtPwd"); txtPwd.setText("pass"); //调用JS触发登录按钮 Page page1 = page.executeJavaScript("$('#btn').click()").getNewPage(); page1 = webClient.getPage(TASK_LIST_URL); System.out.println("*************************************************************************************"); System.out.println(page1.getWebResponse().getContentAsString()); System.out.println("*************************************************************************************"); System.out.println(""); System.out.println("Cookies : " + webClient.getCookieManager().getCookies().toString()); } } |
搞不清ASP.NET内部什么逻辑,试了很多方法都不行,查看了无所网站,无意中看到一个这个配置http://stackoverflow.com/questions/20352284/scraping-aspx-page-using-htmlunit
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import java.net.MalformedURLException; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlElement; import com.gargoylesoftware.htmlunit.html.HtmlPage; public class teste { public static void main(String args[]) throws FailingHttpStatusCodeException, MalformedURLException, IOException { HtmlPage page = null; String url = "http://www.bmfbovespa.com.br/cias-listadas/empresas-listadas/BuscaEmpresaListada.aspx?Idioma=pt-br"; WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(30000); page = webClient.getPage( url ); System.out.println("Current page: Empresas Listadas | BM&FBOVESPA"); HtmlElement theElement1 = (HtmlElement) page.getElementById("ctl00_contentPlaceHolderConteudo_BuscaNomeEmpresa1_btnTodas"); page = theElement1.click(); System.out.println(page.asText()); System.out.println("Test has completed successfully"); } } |
最后测试下来,如果不加 webClient.getOptions().setThrowExceptionOnScriptError(false);就一直报这个错误
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
Exception in thread "main" ======= EXCEPTION START ======== Exception class=[java.lang.RuntimeException] com.gargoylesoftware.htmlunit.ScriptException: Exception invoking click at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878) at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48) at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23) Caused by: java.lang.RuntimeException: Exception invoking click at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181) at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) ... 9 more Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415) at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271) at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121) at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893) at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485) at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072) at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789) at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152) at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153) ... 19 more Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) ... 65 more Enclosed exception: java.lang.RuntimeException: Exception invoking click at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181) at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878) at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48) at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23) Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415) at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271) at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121) at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893) at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485) at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072) at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789) at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152) at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153) ... 19 more Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) ... 65 more ======= EXCEPTION END ======== |
希望能帮助到你,晚安!