开始准备Chromechromedriver驱动(下载不同版本浏览器对应驱动教程)代码pom依赖:<dependencies> <!--添加浏览器驱动--> <dependency> <groupId>org.seleniumhq.selenium</groupId> <artifactId>selenium-server</artifactId> <version>3.141.59</version> </dependency> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-all</artifactId> <version>4.5.6</version> </dependency></dependencies>Main代码:import cn.hutool.core.date.DateUtil;import cn.hutool.core.lang.Console;import cn.hutool.core.util.StrUtil;import org.openqa.selenium.By;import org.openqa.selenium.WebDriver;import org.openqa.selenium.WebElement;import org.openqa.selenium.chrome.ChromeDriverService;import org.openqa.selenium.remote.DesiredCapabilities;import org.openqa.selenium.remote.RemoteWebDriver;import java.io.File;import java.io.IOException;import java.util.ArrayList;public class LagouSpider { private static ArrayList<String> strings = new ArrayList<>(); public static void main(String[] args) { String webDriverPath = LagouSpider.class.getResource("chromedriver.exe").getPath(); // 这里需要注意一定要和打开的Chrome版本匹配 System.setProperty("webdriver.chrome.driver", webDriverPath); // 构建驱动 ChromeDriverService service = new ChromeDriverService.Builder(). usingDriverExecutable(new File(webDriverPath)).usingAnyFreePort().build(); try { service.start(); } catch (IOException e) { e.printStackTrace(); } // 获取Web驱动 WebDriver driver = new RemoteWebDriver(service.getUrl(), DesiredCapabilities.chrome()); String url = "https://tophub.today/"; // 访问页面 driver.get(url); for (int i = 0; i < 200; i++) { for (int j = 0; j <= 10; j++) { try { run(driver, webDriverPath, url, i, j); } catch (Exception e) { continue; } } } Console.log(strings); // 退出驱动线程 driver.quit(); // 关闭service服务 service.stop(); } public static void run(WebDriver driver, String webDriverPath, String url, int i, int j) { String titleExpression = "//div[@id='node-" + i + "']/div/div[contains(@class, 'cc-cd-ih')]/div[contains(@class, 'cc-cd-is')]/a/div[contains(@class, 'cc-cd-lb')]"; String contentExpression = "//div[@id='node-" + i + "']/div/div[contains(@class, 'cc-cd-cb nano has-scrollbar')]/div[contains(@class, 'cc-cd-cb-l nano-content')]/a[" + j + "]/div[contains(@class, 'cc-cd-cb-ll')]/span[contains(@class, 't')]"; // 获取标题 WebElement titleElement = driver.findElement(By.xpath(titleExpression)); String titleElementText = titleElement.getText(); if (StrUtil.isNotEmpty(titleElementText)) { boolean hasStr = -1 == strings.indexOf(titleElementText); if (hasStr) { strings.add(titleElementText); Console.log(StrUtil.format("[{}]", titleElementText)); } WebElement textChildEle = driver.findElement(By.xpath(contentExpression)); if (StrUtil.isNotEmpty(textChildEle.getText())) { Console.log(StrUtil.format("\t[{}]\t[{}]({})", DateUtil.now(), textChildEle.getText()), textChildEle.findElement(By.xpath("//div[@id='node-" + i + "']/div/div[contains(@class, 'cc-cd-cb nano has-scrollbar')]/div[contains(@class, 'cc-cd-cb-l nano-content')]/a[" + j + "]")).getAttribute("href")); } /*try { Thread.sleep(1000L); } catch (InterruptedException e) { Console.error("Thread sleep Error"); }*/ } // 获取内容列表 /*List<WebElement> textParentEle = driver.findElements(By.xpath(contentExpression)); for (WebElement textChildEle : textParentEle) { WebElement childEleElement = textChildEle.findElement(By.className("t")); boolean isEmptyForText = StrUtil.isEmpty(childEleElement.getText()); if (isEmptyForText) { continue; } Console.log(StrUtil.format("\t[{}]\t[{}]\r\n", DateUtil.now(), textChildEle.getText())); }*/ }}其中相关于XPath知识请 进入查看Xpath相关:https://blog.csdn.net/u011541946/article/details/73323911https://blog.csdn.net/u011541946/article/details/67639423