1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
/** * 输出文字 * @param response * @param s */ public static void responseOut(HttpServletResponse response,String s){ response.setContentType("text/html;charset=UTF-8"); response.setCharacterEncoding("UTF-8"); try ( PrintWriter pw = response.getWriter() ){ pw.write(s); } catch (IOException e) { e.printStackTrace(); } } |
from:https://www.cnblogs.com/yanqin/p/7463294.html
View DetailsPS:我只用到了这一句 webClient.getOptions().setThrowExceptionOnScriptError(false); htmlunit jar项目路径http://sourceforge.net/projects/htmlunit/files/htmlunit/ demo代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
public class AutoLogin { /** 登录页面 */ private static final String LOGIN_URL = "http://website/login.aspx"; /** 任务列表页面 */ private static final String TASK_LIST_URL = "http://website/Banli.aspx"; /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { testHomePage(); } public static void testHomePage() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8); webClient.getOptions().setThrowExceptionOnScriptError(false); //此行必须要加 webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setJavaScriptEnabled(true); // webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(300000); // 获取首页 HtmlPage page = (HtmlPage) webClient.getPage(LOGIN_URL); // 根据form的名字获取页面表单,也可以通过索引来获取:page.getForms().get(0) final HtmlForm form = page.getFormByName("form1"); // 用户名/密码 HtmlTextInput textUserName = form.getInputByName("txtUserName"); textUserName.setText("username"); HtmlPasswordInput txtPwd = form.getInputByName("txtPwd"); txtPwd.setText("pass"); //调用JS触发登录按钮 Page page1 = page.executeJavaScript("$('#btn').click()").getNewPage(); page1 = webClient.getPage(TASK_LIST_URL); System.out.println("*************************************************************************************"); System.out.println(page1.getWebResponse().getContentAsString()); System.out.println("*************************************************************************************"); System.out.println(""); System.out.println("Cookies : " + webClient.getCookieManager().getCookies().toString()); } } |
搞不清ASP.NET内部什么逻辑,试了很多方法都不行,查看了无所网站,无意中看到一个这个配置http://stackoverflow.com/questions/20352284/scraping-aspx-page-using-htmlunit
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import java.net.MalformedURLException; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlElement; import com.gargoylesoftware.htmlunit.html.HtmlPage; public class teste { public static void main(String args[]) throws FailingHttpStatusCodeException, MalformedURLException, IOException { HtmlPage page = null; String url = "http://www.bmfbovespa.com.br/cias-listadas/empresas-listadas/BuscaEmpresaListada.aspx?Idioma=pt-br"; WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(30000); page = webClient.getPage( url ); System.out.println("Current page: Empresas Listadas | BM&FBOVESPA"); HtmlElement theElement1 = (HtmlElement) page.getElementById("ctl00_contentPlaceHolderConteudo_BuscaNomeEmpresa1_btnTodas"); page = theElement1.click(); System.out.println(page.asText()); System.out.println("Test has completed successfully"); } } |
最后测试下来,如果不加 webClient.getOptions().setThrowExceptionOnScriptError(false);就一直报这个错误
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
Exception in thread "main" ======= EXCEPTION START ======== Exception class=[java.lang.RuntimeException] com.gargoylesoftware.htmlunit.ScriptException: Exception invoking click at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878) at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48) at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23) Caused by: java.lang.RuntimeException: Exception invoking click at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181) at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) ... 9 more Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415) at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271) at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121) at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893) at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485) at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072) at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789) at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152) at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153) ... 19 more Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) ... 65 more Enclosed exception: java.lang.RuntimeException: Exception invoking click at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181) at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878) at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48) at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23) Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354) at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415) at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271) at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183) at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121) at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893) at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227) at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485) at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072) at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789) at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152) at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153) ... 19 more Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971) at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243) at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) ... 65 more ======= EXCEPTION END ======== |
希望能帮助到你,晚安! from:https://www.cnblogs.com/yimu/p/LOVE_HCJ.html
View DetailsPS:下面这个低本息我测试成功了,高版本怎么试都有问题。 随着Web的发展,RIA越来越多,JavaScript和Complex AJAX Libraries给网络爬虫带来了极大的挑战,解析页面的时候需要模拟浏览器执行JavaScript才能获得需要的文本内容。 好在有一个Java开源项目HtmlUnit,它能模拟Firefox、IE、Chrome等浏览器,不但可以用来测试Web应用,还可以用来解析包含JS的页面以提取信息。 下面看看HtmlUnit的效果如何: 首先,建立一个maven工程,引入junit依赖和HtmlUnit依赖:
1 2 3 4 5 6 7 8 9 10 11 |
<dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.8.2</version> <scope>test</scope> </dependency> <dependency> <groupId>net.sourceforge.htmlunit</groupId> <artifactId>htmlunit</artifactId> <version>2.14</version> </dependency> |
其次,写一个junit单元测试来使用HtmlUnit提取页面信息:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
/** * 使用HtmlUnit模拟浏览器执行JS来获取网页内容 * @author 杨尚川 */ public class HtmlUnitTest { @Test public void homePage() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_11); final HtmlPage page = webClient.getPage("http://yangshangchuan.iteye.com"); Assert.assertEquals("杨尚川的博客 - ITeye技术网站", page.getTitleText()); final String pageAsXml = page.asXml(); Assert.assertTrue(pageAsXml.contains("杨尚川,系统架构设计师,系统分析师,2013年度优秀开源项目APDPlat发起人,资深Nutch搜索引擎专家。多年专业的软件研发经验,从事过管理信息系统(MIS)开发、移动智能终端(Win CE、Android、Java ME)开发、搜索引擎(nutch、lucene、solr、elasticsearch)开发、大数据分析处理(Hadoop、Hbase、Pig、Hive)等工作。目前为独立咨询顾问,专注于大数据、搜索引擎等相关技术,为客户提供Nutch、Lucene、Hadoop、Solr、ElasticSearch、HBase、Pig、Hive、Gora等框架的解决方案、技术支持、技术咨询以及培训等服务。")); final String pageAsText = page.asText(); Assert.assertTrue(pageAsText.contains("[置顶] 国内首套免费的《Nutch相关框架视频教程》(1-20)")); webClient.closeAllWindows(); } @Test public void homePage_Firefox() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); final HtmlPage page = webClient.getPage("http://yangshangchuan.iteye.com"); Assert.assertEquals("杨尚川的博客 - ITeye技术网站", page.getTitleText()); webClient.closeAllWindows(); } @Test public void getElements() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.CHROME); final HtmlPage page = webClient.getPage("http://yangshangchuan.iteye.com"); final HtmlDivision div = page.getHtmlElementById("blog_actions"); //获取子元素 Iterator<DomElement> iter = div.getChildElements().iterator(); while(iter.hasNext()){ System.out.println(iter.next().getTextContent()); } //获取所有输出链接 for(HtmlAnchor anchor : page.getAnchors()){ System.out.println(anchor.getTextContent()+" : "+anchor.getAttribute("href")); } webClient.closeAllWindows(); } @Test public void xpath() throws Exception { final WebClient webClient = new WebClient(); final HtmlPage page = webClient.getPage("http://yangshangchuan.iteye.com"); //获取所有博文标题 final List<HtmlAnchor> titles = (List<HtmlAnchor>)page.getByXPath("/html/body/div[2]/div[2]/div/div[16]/div/h3/a"); for(HtmlAnchor title : titles){ System.out.println(title.getTextContent()+" : "+title.getAttribute("href")); } //获取博主信息 final HtmlDivision div = (HtmlDivision) page.getByXPath("//div[@id='blog_owner_name']").get(0); System.out.println(div.getTextContent()); webClient.closeAllWindows(); } @Test public void submittingForm() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); final HtmlPage page = webClient.getPage("http://www.oschina.net"); // Form没有name和id属性 final HtmlForm form = page.getForms().get(0); final HtmlTextInput textField = form.getInputByName("q"); final HtmlButton button = form.getButtonByName(""); textField.setValueAttribute("APDPlat"); final HtmlPage resultPage = button.click(); final String pageAsText = resultPage.asText(); Assert.assertTrue(pageAsText.contains("找到约")); Assert.assertTrue(pageAsText.contains("条结果")); webClient.closeAllWindows(); } } |
最后,我们运行单元测试, 全部通过测试! from:http://yangshangchuan.iteye.com/blog/2036809
View Details也谈基于 Node.js 的全栈式开发(基于 Node.js 的前后端分离) 前言 为了解决传统 Web 开发模式带来的各种问题,我们进行了许多尝试,但由于前/后端的物理鸿沟,尝试的方案都大同小异。痛定思痛,今天我们重新思考了“前后端”的定义,引入前端同学都熟悉的 Node.js,试图探索一条全新的前后端分离模式。 随着不同终端(Pad/Mobile/PC)的兴起,对开发人员的要求越来越高,纯浏览器端的响应式已经不能满足用户体验的高要求,我们往往需要针对不同的终端开发定制的版本。为了提升开发效率,前后端分离的需求越来越被重视,后端负责业务/数据接口,前端负责展现/交互逻辑,同一份数据接口,我们可以定制开发多个版本。 这个话题最近被讨论得比较多,阿里有些 BU 也在进行一些尝试。讨论了很久之后,我们团队决定探索一套基于 Node.js 的前后端分离方案,过程中有一些不断变化的认识以及思考,记录在这里,也希望看到的同学参与讨论,帮我们完善。 一、什么是前后端分离? 最开始组内讨论的过程中我发现,每个人对前后端分离的理解不一样,为了保证能在同一个频道讨论,先就什么是”前后端分离”达成一致。 大家一致认同的前后端分离的例子就是 SPA(Single Page Application),所有用到的展现数据都是后端通过异步接口(AJAX/JSONP)的方式提供的,前端只管展现。 从某种意义上来说,SPA 确实做到了前后端分离,但这种方式存在两个问题: WEB 服务中,SPA 类占的比例很少。很多场景下还有同步/同步+异步混合的模式,SPA 不能作为一种通用的解决方案。 现阶段的 SPA 开发模式,接口通常是按照展现逻辑来提供的,有时候为了提高效率,后端会帮我们处理一些展现逻辑,这就意味着后端还是涉足了 View 层的工作,不是真正的前后端分离。 SPA 式的前后端分离,是从物理层做区分(认为只要是客户端的就是前端,服务器端的就是后端),这种分法已经无法满足我们前后端分离的需求,我们认为从职责上划分才能满足目前我们的使用场景: 前端:负责 View 和 Controller 层。 后端:只负责 Model 层,业务处理/数据等。 为什么去做这种职责的划分,后面会继续探讨。 二、为什么要前后端分离? 关于这个问题,玉伯的文章《Web 研发模式演变》中解释得非常全面,我们再大概理一下: 2.1 现有开发模式的适用场景 玉伯提到的几种开发模式,各有各的适用场景,没有哪一种完全取代另外一种。 比如后端为主的 MVC,做一些同步展现的业务效率很高,但是遇到同步异步结合的页面,与后端开发沟通起来就会比较麻烦。 AJAX 为主 SPA 型开发模式,比较适合开发 APP 类型的场景,但是只适合做 APP,因为 SEO 等问题不好解决,对于很多类型的系统,这种开发方式也过重。 2.2 前后端职责不清 在业务逻辑复杂的系统里,我们最怕维护前后端混杂在一起的代码,因为没有约束,M-V-C每一层都可能出现别的层的代码,日积月累,完全没有维护性可言。 虽然前后端分离没办法完全解决这种问题,但是可以大大缓解。因为从物理层次上保证了你不可能这么做。 2.3 开发效率问题 淘宝的 Web 基本上都是基于 MVC 框架 webx,架构决定了前端只能依赖后端。 所以我们的开发模式依然是,前端写好静态 demo,后端翻译成 VM 模版,这种模式的问题就不说了,被吐槽了很久。 直接基于后端环境开发也很痛苦,配置安装使用都很麻烦。 为了解决这个问题,我们发明了各种工具,比如 VMarket,但是前端还是要写 VM,而且依赖后端数据,效率依然不高。 另外,后端也没法摆脱对展现的强关注,从而专心于业务逻辑层的开发。 2.4 对前端发挥的局限 性能优化如果只在前端做空间非常有限,于是我们经常需要后端合作才能碰撞出火花,但由于后端框架限制,我们很难使用 Comet、Bigpipe 等技术方案来优化性能。 为了解决以上提到的一些问题,我们进行了很多尝试,开发了各种工具,但始终没有太多起色,主要是因为我们只能在后端给我们划分的那一小块空间去发挥。只有真正做到前后端分离,我们才能彻底解决以上问题。 三、怎么做前后端分离? 怎么做前后端分离,其实第一节中已经有了答案: 前端:负责 […]
View Details1.页面中添加modal <!-- 模态框(Modal) --> <div class="modal fade" id="showModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true"> <div class="modal-dialog" style="width:900px;"> <div class="modal-content"> //内容动态填充 </div><!-- /.modal-content --> </div><!-- /.modal --> </div> 2.数据格式。页面动态生成a 标签 <a data-toggle=’modal' data-target=’#showModal' onclick=targetto('" + row.id + "')>"+ row.title+"</a>"; 3.单击事件 function targetto(id){ $("#showModal").modal({ remote: "showAction.action?id="+id }); } 4.每次隐藏时,清除数据。确保点击时,重新加载 $("#showModal").on("hidden.bs.modal", function() { $(this).removeData("bs.modal"); }); 5.基于bootstrap3.其他版本,请自己Google,如有雷同,纯属巧合,如有需要,欢迎分享。 from:https://www.cnblogs.com/youmingkuang/p/6214610.html
View Details前言 之前的前5篇作为EF方面的基础篇,后面我们将使用MVC+EF 并且使用IOC ,Repository,UnitOfWork,DbContext来整体来学习。因为后面要用到IOC,所以本篇先单独先学习一下IOC,我们本本文单独主要学习Autofac,其实对于Autofac我也是边学边记录。不对的地方,也希望大家多多指导。 个人在学习过程中参考博客: AutoFac文档:http://www.cnblogs.com/wolegequ/archive/2012/06/09/2543487.html AutoFac使用方法总结:Part I:http://niuyi.github.io/blog/2012/04/06/autofac-by-unit-test/ 为什么使用AutoFac? Autofac是.NET领域最为流行的IOC框架之一,传说是速度最快的一个: 优点: 它是C#语言联系很紧密,也就是说C#里的很多编程方式都可以为Autofac使用,例如可以用Lambda表达式注册组件 较低的学习曲线,学习它非常的简单,只要你理解了IoC和DI的概念以及在何时需要使用它们 XML配置支持 自动装配 与Asp.Net MVC 集成 微软的Orchad开源程序使用的就是Autofac,从该源码可以看出它的方便和强大 上面的优点我也是拷的别人文章里面的,上面的这个几乎所有讲Autofac博文都会出现的。这个也是首次学习,所以我们还是记录的细一点。 怎么使用Autofac 通过VS中的NuGet来加载AutoFac,引入成功后引用就会出现Autofac。 1、我们做一个简单的例子先用一下 就拿数据访问来做案例把,一个数据请求有两个类,一个是Oracle 一个是SQLSERVER。我们在使用的时候可以选择调用那个数据库。 1.1 我们先定义一个数据访问的接口和访问类。
1 2 3 4 5 6 7 8 9 10 11 |
/// <summary> /// 数据源操作接口 /// </summary> public interface IDataSource { /// <summary> /// 获取数据 /// </summary> /// <returns></returns> string GetData(); } |
1 2 3 4 5 6 7 8 9 10 |
/// <summary> /// SQLSERVER数据库 /// </summary> class Sqlserver : IDataSource { public string GetData() { return "通过SQLSERVER获取数据"; } } |
1 2 3 4 5 6 7 8 9 10 |
/// <summary> /// ORACLE数据库 /// </summary> public class Oracle : IDataSource { public string GetData() { return "通过Oracle获取数据"; } } |
最普通的方式大家都会的吧! 如果最普通的方式调用SQLSERVER怎么写?
1 2 3 4 5 6 7 8 |
static void Main(string[] args) { IDataSource ds = new Sqlserver(); Console.WriteLine(ds.GetData()); Console.ReadLine(); } |
1 |
调用Oracle的话new Oracle()就可以了。如果这个都不能理解的话,那学习这个你就很费劲了。 |
改进一下代码。我们在加入一个DataSourceManager类来看一下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
/// <summary> /// 数据源管理类 /// </summary public class DataSourceManager { IDataSource _ds; /// <summary> /// 根据传入的类型动态创建对象 /// </summary> /// <param name="ds"></param> public DataSourceManager(IDataSource ds) { _ds = ds; } public string GetData() { return _ds.GetData(); } } |
这样写的好处是什么,这样加入加入新的数据源,只用调用的时候传入这个对象就可以,就会自动创建一个对应的对象。那接下如果要调用SQLSERVER怎么写。看代码
1 2 3 4 |
DataSourceManager dsm = new DataSourceManager(new Sqlserver()); Console.WriteLine(dsm.GetData()); Console.ReadLine(); |
1.2 注入实现构造函数注入 上面的DataSourceManager的动态创建的方式就是因为又有个带IDataSource的参数的构造函数,只要调用者传入实现该接口的对象,就实现了对象创建。 那我们看看怎么使用AutoFac注入实现构造函数注入
1 2 3 4 5 6 7 8 9 10 11 |
var builder = new ContainerBuilder(); builder.RegisterType<DataSourceManager>(); builder.RegisterType<Sqlserver>().As<IDataSource>(); using (var container = builder.Build()) { var manager = container.Resolve<DataSourceManager>(); Console.WriteLine(manager.GetData()); Console.ReadLine(); } |
上面的就是AutoFac构造函数注入,他给IDataSource注入的是Sqlserver所以我们调用的数据,返回的就是Sqlserver数据。那下面我们具体的了解一下AutoFac的一些方法 1.3 Autofac方法说明
1 |
(1)builder.RegisterType<Object>().As<Iobject>():注册类型及其实例。例如上面就是注册接口IDataSource的实例Sqlserver |
1 |
(2)IContainer.Resolve<IDAL>():解析某个接口的实例。例如一下代码,我可以解析接口返回的就是Sqlserver实例 |
var builder = new ContainerBuilder(); //builder.RegisterType<DataSourceManager>(); builder.RegisterType<Sqlserver>().As<IDataSource>(); using (var container = builder.Build()) { var manager = container.Resolve<IDataSource>(); Console.WriteLine(manager.GetData()); Console.ReadLine(); }
1 |
(3)builder.RegisterType<Object>().Named<Iobject>(string name):为一个接口注册不同的实例。有时候难免会碰到多个类映射同一个接口,比如Sqlerver和Oracle都实现了IDalSource接口,为了准确获取想要的类型,就必须在注册时起名字。 |
1 2 3 4 5 6 7 8 var builder = new ContainerBuilder(); builder.RegisterType<Sqlserver>().Named<IDataSource>("SqlServer"); builder.RegisterType<Oracle>().Named<IDataSource>("Oracel"); using (var container = […]
View Details