Java使用Chrome驱动作爬虫以及示例

开始准备Chromechromedriver驱动(下载不同版本浏览器对应驱动教程)代码pom依赖:<dependencies>    <!--添加浏览器驱动-->    <dependency>        <groupId>org.seleniumhq.selenium</groupId>        <artifactId>selenium-server</artifactId>        <version>3.141.59</version>    </dependency>    <dependency>        <groupId>cn.hutool</groupId>        <artifactId>hutool-all</artifactId>        <version>4.5.6</version>    </dependency></dependencies>Main代码:import cn.hutool.core.date.DateUtil;import cn.hutool.core.lang.Console;import cn.hutool.core.util.StrUtil;import org.openqa.selenium.By;import org.openqa.selenium.WebDriver;import org.openqa.selenium.WebElement;import org.openqa.selenium.chrome.ChromeDriverService;import org.openqa.selenium.remote.DesiredCapabilities;import org.openqa.selenium.remote.RemoteWebDriver;import java.io.File;import java.io.IOException;import java.util.ArrayList;public class LagouSpider {    private static ArrayList<String> strings = new ArrayList<>();    public static void main(String[] args) {        String webDriverPath = LagouSpider.class.getResource("chromedriver.exe").getPath();        // 这里需要注意一定要和打开的Chrome版本匹配        System.setProperty("webdriver.chrome.driver", webDriverPath);        // 构建驱动        ChromeDriverService service = new ChromeDriverService.Builder().                usingDriverExecutable(new File(webDriverPath)).usingAnyFreePort().build();        try {            service.start();        } catch (IOException e) {            e.printStackTrace();        }        // 获取Web驱动        WebDriver driver = new RemoteWebDriver(service.getUrl(), DesiredCapabilities.chrome());        String url = "https://tophub.today/";        // 访问页面        driver.get(url);        for (int i = 0; i < 200; i++) {            for (int j = 0; j <= 10; j++) {                try {                    run(driver, webDriverPath, url, i, j);                } catch (Exception e) {                    continue;                }            }        }        Console.log(strings);        // 退出驱动线程        driver.quit();        // 关闭service服务        service.stop();    }    public static void run(WebDriver driver, String webDriverPath, String url, int i, int j) {        String titleExpression =                "//div[@id='node-" + i + "']/div/div[contains(@class, 'cc-cd-ih')]/div[contains(@class, 'cc-cd-is')]/a/div[contains(@class, 'cc-cd-lb')]";        String contentExpression =                "//div[@id='node-" + i + "']/div/div[contains(@class, 'cc-cd-cb nano has-scrollbar')]/div[contains(@class, 'cc-cd-cb-l nano-content')]/a[" + j + "]/div[contains(@class, 'cc-cd-cb-ll')]/span[contains(@class, 't')]";        // 获取标题        WebElement titleElement = driver.findElement(By.xpath(titleExpression));        String titleElementText = titleElement.getText();        if (StrUtil.isNotEmpty(titleElementText)) {            boolean hasStr = -1 == strings.indexOf(titleElementText);            if (hasStr) {                strings.add(titleElementText);                Console.log(StrUtil.format("[{}]",                        titleElementText));            }            WebElement textChildEle = driver.findElement(By.xpath(contentExpression));            if (StrUtil.isNotEmpty(textChildEle.getText())) {                Console.log(StrUtil.format("\t[{}]\t[{}]({})",                        DateUtil.now(),                        textChildEle.getText()),                        textChildEle.findElement(By.xpath("//div[@id='node-" + i + "']/div/div[contains(@class, 'cc-cd-cb nano has-scrollbar')]/div[contains(@class, 'cc-cd-cb-l nano-content')]/a[" + j + "]")).getAttribute("href"));            }            /*try {                Thread.sleep(1000L);            } catch (InterruptedException e) {                Console.error("Thread sleep Error");            }*/        }        // 获取内容列表        /*List<WebElement> textParentEle = driver.findElements(By.xpath(contentExpression));        for (WebElement textChildEle : textParentEle) {            WebElement childEleElement = textChildEle.findElement(By.className("t"));            boolean isEmptyForText = StrUtil.isEmpty(childEleElement.getText());            if (isEmptyForText) {                continue;            }            Console.log(StrUtil.format("\t[{}]\t[{}]\r\n",                    DateUtil.now(),                    textChildEle.getText()));        }*/    }}其中相关于XPath知识请 进入查看Xpath相关:https://blog.csdn.net/u011541946/article/details/73323911https://blog.csdn.net/u011541946/article/details/67639423