javatools.wget
Class Wget

java.lang.Object
  |
  +--javatools.wget.Wget

public class Wget
extends java.lang.Object


Nested Class Summary
static class Wget.El
           
 
Field Summary
static Wget.El A_HREF
           
(package private)  java.util.Set alreadyProcessedURLs
           
static Wget.El APPLET_CODE
           
static Wget.El AREA_HREF
           
static Wget.El BGSOUND_SRC
           
static Wget.El BODY_BACKGROUND
           
static Wget.El EMBED_HREF
           
static Wget.El EMBED_SRC
           
(package private)  java.util.List errorListeners
           
static Wget.El FIG_SRC
           
 java.util.Map followTags
           
static Wget.El FRAME_SRC
           
static java.lang.String HTML_TYPE
           
static Wget.El IFRAME_SRC
           
static Wget.El IMG_HREF
           
static Wget.El IMG_LOWSRC
           
static Wget.El IMG_SRC
           
static Wget.El INPUT_SRC
           
static Wget.El LAYER_SRC
           
protected static java.util.regex.Pattern metaRefresh
           
static Wget.El OVERLAY_SRC
           
(package private)  WgetProcessUrl processUrl
           
static Wget.El SCRIPT_SRC
           
(package private)  boolean spiderURLs
           
static Wget.El TABLE_BACKGROUND
           
static Wget.El TD_BACKGROUND
           
static Wget.El TH_BACKGROUND
           
(package private)  WgetURLValidator urlValidator
           
 
Constructor Summary
Wget(WgetProcessUrl processUrl)
           
 
Method Summary
 void addAllTags()
           
static void addAllTags(java.util.Map followTags)
           
(package private)  void addAlreadyProcessed(java.net.URL url)
           
 void addErrorListener(WgetErrorListener listener)
           
 void addFollowTag(Wget.El el)
           
static void addFollowTag(Wget.El el, java.util.Map followTags)
           
(package private)  java.lang.String connectionToMimeType(java.net.HttpURLConnection hconnect)
           
 void doProcessUrl(java.net.URLConnection connection)
           
 void doSpiderURLs(java.net.HttpURLConnection connection)
           
(package private)  void error(java.net.URL url, java.lang.String urlString, java.lang.Exception e)
           
(package private)  java.util.List followURL(org.jdom.Element element)
           
(package private)  java.util.List getListOfLinks(org.jdom.Element element)
          given a tree of xhtml, return a list of URLs as Strings contained in the entire XML tree.
(package private)  void getListOfLinksHelper(org.jdom.Element element, java.util.List rtn)
          Recursive Helper function for getListOfLinks
static java.lang.String getMetaRefreshHeader(org.jdom.Element element)
           
static java.lang.String getMetaRefreshUrl(org.jdom.Element element)
           
 WgetURLValidator getUrlValidator()
           
(package private)  boolean isAlreadyProcessed(java.net.URL url)
           
static boolean isMetaRefresh(org.jdom.Element element)
           
static boolean isRedirected(java.net.URLConnection connection)
           
 boolean isSpiderURLs()
           
static void main(java.lang.String[] args)
           
 java.net.URLConnection openConnection(java.net.URL url)
           
 void recursiveGet(java.net.URL url)
           
 void removeAllTags()
           
 void removeFollowTag(Wget.El el)
           
 void setSpiderURLs(boolean v)
           
 void setUrlValidator(WgetURLValidator urlValidator)
           
(package private)  org.jdom.Element streamToElement(java.io.InputStream is)
          Convert an InputStream into a jdom tree of xhtml.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

HTML_TYPE

public static final java.lang.String HTML_TYPE
See Also:
Constant Field Values

alreadyProcessedURLs

java.util.Set alreadyProcessedURLs

processUrl

WgetProcessUrl processUrl

urlValidator

WgetURLValidator urlValidator

errorListeners

java.util.List errorListeners

spiderURLs

boolean spiderURLs

followTags

public java.util.Map followTags

metaRefresh

protected static java.util.regex.Pattern metaRefresh

A_HREF

public static final Wget.El A_HREF

APPLET_CODE

public static final Wget.El APPLET_CODE

AREA_HREF

public static final Wget.El AREA_HREF

BGSOUND_SRC

public static final Wget.El BGSOUND_SRC

BODY_BACKGROUND

public static final Wget.El BODY_BACKGROUND

EMBED_HREF

public static final Wget.El EMBED_HREF

EMBED_SRC

public static final Wget.El EMBED_SRC

FIG_SRC

public static final Wget.El FIG_SRC

FRAME_SRC

public static final Wget.El FRAME_SRC

IFRAME_SRC

public static final Wget.El IFRAME_SRC

IMG_HREF

public static final Wget.El IMG_HREF

IMG_LOWSRC

public static final Wget.El IMG_LOWSRC

IMG_SRC

public static final Wget.El IMG_SRC

INPUT_SRC

public static final Wget.El INPUT_SRC

LAYER_SRC

public static final Wget.El LAYER_SRC

OVERLAY_SRC

public static final Wget.El OVERLAY_SRC

SCRIPT_SRC

public static final Wget.El SCRIPT_SRC

TABLE_BACKGROUND

public static final Wget.El TABLE_BACKGROUND

TD_BACKGROUND

public static final Wget.El TD_BACKGROUND

TH_BACKGROUND

public static final Wget.El TH_BACKGROUND
Constructor Detail

Wget

public Wget(WgetProcessUrl processUrl)
Method Detail

addAllTags

public void addAllTags()

addAllTags

public static void addAllTags(java.util.Map followTags)

removeAllTags

public void removeAllTags()

removeFollowTag

public void removeFollowTag(Wget.El el)

addFollowTag

public void addFollowTag(Wget.El el)

addFollowTag

public static void addFollowTag(Wget.El el,
                                java.util.Map followTags)

addErrorListener

public void addErrorListener(WgetErrorListener listener)

main

public static void main(java.lang.String[] args)
                 throws WgetException
WgetException

getListOfLinks

java.util.List getListOfLinks(org.jdom.Element element)
given a tree of xhtml, return a list of URLs as Strings contained in the entire XML tree.

Parameters:
element -
Returns:
List of urls as Strings

isMetaRefresh

public static boolean isMetaRefresh(org.jdom.Element element)

getMetaRefreshHeader

public static java.lang.String getMetaRefreshHeader(org.jdom.Element element)

getMetaRefreshUrl

public static java.lang.String getMetaRefreshUrl(org.jdom.Element element)

followURL

java.util.List followURL(org.jdom.Element element)
Parameters:
element -
Returns:
URL to follow

getListOfLinksHelper

void getListOfLinksHelper(org.jdom.Element element,
                          java.util.List rtn)
Recursive Helper function for getListOfLinks

Parameters:
element -
rtn -
See Also:
getListOfLinks

connectionToMimeType

java.lang.String connectionToMimeType(java.net.HttpURLConnection hconnect)

openConnection

public java.net.URLConnection openConnection(java.net.URL url)
                                      throws java.io.IOException
java.io.IOException

recursiveGet

public void recursiveGet(java.net.URL url)
                  throws WgetException,
                         java.io.IOException
WgetException
java.io.IOException

doSpiderURLs

public void doSpiderURLs(java.net.HttpURLConnection connection)
                  throws java.io.IOException,
                         WgetException
java.io.IOException
WgetException

doProcessUrl

public void doProcessUrl(java.net.URLConnection connection)
                  throws java.io.IOException,
                         WgetException
java.io.IOException
WgetException

error

void error(java.net.URL url,
           java.lang.String urlString,
           java.lang.Exception e)
     throws WgetException
WgetException

streamToElement

org.jdom.Element streamToElement(java.io.InputStream is)
                           throws java.io.IOException,
                                  java.io.FileNotFoundException,
                                  org.jdom.JDOMException,
                                  WgetException
Convert an InputStream into a jdom tree of xhtml.

Parameters:
is - An InputStream
Returns:
a jdom tree.
Throws:
java.io.IOException
java.io.FileNotFoundException
org.jdom.JDOMException
WgetException

isRedirected

public static boolean isRedirected(java.net.URLConnection connection)
                            throws java.io.IOException
java.io.IOException

isAlreadyProcessed

boolean isAlreadyProcessed(java.net.URL url)

addAlreadyProcessed

void addAlreadyProcessed(java.net.URL url)

getUrlValidator

public WgetURLValidator getUrlValidator()

setUrlValidator

public void setUrlValidator(WgetURLValidator urlValidator)

isSpiderURLs

public boolean isSpiderURLs()

setSpiderURLs

public void setSpiderURLs(boolean v)