How to identify broken links in Selenium WebDriver: Java Example


import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
public class App {
    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.chrome.driver", "D:\\ChromeDriver\\chromedriver.exe");

        // Initialize the Chrome Driver
        WebDriver driver = new ChromeDriver();
        System.out.println("Before openinig url");
        //Navigate to the webpage
        driver.get("https://www.google.com");

        // Find All links on the page
        List<WebElement> links = driver.findElements(By.tagName("a"));
        for(WebElement link : links) {
            String url = link.getAttribute("href");
           
            // Now check link is missing.
            if (url.isEmpty() )
                System.out.println("Missing link ");
            else {
            //check link is invalid or broken"
                try {
                    URL linkUrl = new URL(null, url);
                    HttpURLConnection httpURLConnection = (HttpURLConnection) linkUrl.openConnection();
                    httpURLConnection.setConnectTimeout(5000);
                    httpURLConnection.connect();
                    if (httpURLConnection.getResponseCode() >= 400)
                        System.out.println(url + "is a brokenb link");
                    else
                        System.out.println(url + " is a valid link");
                }
                catch (Exception e) {
                    System.out.println(url + "is broken link");
                }

            }
        }
        //Close Driver
        driver.quit();
    }
}

The code uses the Selenium WebDriver to navigate to the web page and find all the links on the page. It then iterates through each link, checks url is empty or not. If not empty, opens a connection to the URL, and checks the response code. If the response code is greater than or equal to 400, it indicates that the link is broken and the URL is printed. Otherwise, the link is valid and the URL is printed.

Comments

Popular posts from this blog

Selenium: File download handling.

Major Differences between Python and Java

Cypress V/S Selenium