/* FindLinks.java David Klick CIS 260 2011-06-21 Uses regular expressions to find links in web pages. */ import java.io.*; import java.net.*; import java.util.*; import java.util.regex.*; public class FindLinks { public static void main(String[] args) { Pattern pat = Pattern.compile("", Pattern.CASE_INSENSITIVE); Matcher matcher = null; String[] urls = null; int ch; if (args.length == 0) { Scanner in = new Scanner(System.in); System.out.print("Enter a space separated list of URLs: "); urls = in.nextLine().split("\\s+"); if (urls.length == 0) { System.out.println("No URLs to process. Bye."); System.exit(0); } } else urls = args; StringBuilder page = new StringBuilder(); InputStreamReader str = null; for (String url : urls) { try { if (!url.startsWith("http://")) url = "http://" + url; str = new InputStreamReader((new URL(url)).openStream()); page.setLength(0); while ((ch = str.read()) != -1) page.append((char)ch); matcher = pat.matcher(page); System.out.println("\nLinks found in " + url); int matchNum = 0; while (matcher.find()) { System.out.printf("%3d: %s\n", ++matchNum, page.substring(matcher.start(), matcher.end())); } if (matchNum == 0) System.out.println("No links found on page."); } catch (IOException e) { System.out.println("\nError connecting to " + url); } finally { if (str != null) { try { str.close(); } catch (IOException e2) {} } } } } }