javatools.wget
Class Wget
java.lang.Object
|
+--javatools.wget.Wget
- public class Wget
- extends java.lang.Object
Nested Class Summary |
static class |
Wget.El
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
HTML_TYPE
public static final java.lang.String HTML_TYPE
- See Also:
- Constant Field Values
alreadyProcessedURLs
java.util.Set alreadyProcessedURLs
processUrl
WgetProcessUrl processUrl
urlValidator
WgetURLValidator urlValidator
errorListeners
java.util.List errorListeners
spiderURLs
boolean spiderURLs
followTags
public java.util.Map followTags
metaRefresh
protected static java.util.regex.Pattern metaRefresh
A_HREF
public static final Wget.El A_HREF
APPLET_CODE
public static final Wget.El APPLET_CODE
AREA_HREF
public static final Wget.El AREA_HREF
BGSOUND_SRC
public static final Wget.El BGSOUND_SRC
BODY_BACKGROUND
public static final Wget.El BODY_BACKGROUND
EMBED_HREF
public static final Wget.El EMBED_HREF
EMBED_SRC
public static final Wget.El EMBED_SRC
FIG_SRC
public static final Wget.El FIG_SRC
FRAME_SRC
public static final Wget.El FRAME_SRC
IFRAME_SRC
public static final Wget.El IFRAME_SRC
IMG_HREF
public static final Wget.El IMG_HREF
IMG_LOWSRC
public static final Wget.El IMG_LOWSRC
IMG_SRC
public static final Wget.El IMG_SRC
INPUT_SRC
public static final Wget.El INPUT_SRC
LAYER_SRC
public static final Wget.El LAYER_SRC
OVERLAY_SRC
public static final Wget.El OVERLAY_SRC
SCRIPT_SRC
public static final Wget.El SCRIPT_SRC
TABLE_BACKGROUND
public static final Wget.El TABLE_BACKGROUND
TD_BACKGROUND
public static final Wget.El TD_BACKGROUND
TH_BACKGROUND
public static final Wget.El TH_BACKGROUND
Wget
public Wget(WgetProcessUrl processUrl)
addAllTags
public void addAllTags()
addAllTags
public static void addAllTags(java.util.Map followTags)
removeAllTags
public void removeAllTags()
removeFollowTag
public void removeFollowTag(Wget.El el)
addFollowTag
public void addFollowTag(Wget.El el)
addFollowTag
public static void addFollowTag(Wget.El el,
java.util.Map followTags)
addErrorListener
public void addErrorListener(WgetErrorListener listener)
main
public static void main(java.lang.String[] args)
throws WgetException
WgetException
getListOfLinks
java.util.List getListOfLinks(org.jdom.Element element)
- given a tree of xhtml, return a list of URLs as Strings contained in
the entire XML tree.
- Parameters:
element
-
- Returns:
- List of urls as Strings
isMetaRefresh
public static boolean isMetaRefresh(org.jdom.Element element)
getMetaRefreshHeader
public static java.lang.String getMetaRefreshHeader(org.jdom.Element element)
getMetaRefreshUrl
public static java.lang.String getMetaRefreshUrl(org.jdom.Element element)
followURL
java.util.List followURL(org.jdom.Element element)
- Parameters:
element
-
- Returns:
- URL to follow
getListOfLinksHelper
void getListOfLinksHelper(org.jdom.Element element,
java.util.List rtn)
- Recursive Helper function for getListOfLinks
- Parameters:
element
- rtn
- - See Also:
getListOfLinks
connectionToMimeType
java.lang.String connectionToMimeType(java.net.HttpURLConnection hconnect)
openConnection
public java.net.URLConnection openConnection(java.net.URL url)
throws java.io.IOException
java.io.IOException
recursiveGet
public void recursiveGet(java.net.URL url)
throws WgetException,
java.io.IOException
WgetException
java.io.IOException
doSpiderURLs
public void doSpiderURLs(java.net.HttpURLConnection connection)
throws java.io.IOException,
WgetException
java.io.IOException
WgetException
doProcessUrl
public void doProcessUrl(java.net.URLConnection connection)
throws java.io.IOException,
WgetException
java.io.IOException
WgetException
error
void error(java.net.URL url,
java.lang.String urlString,
java.lang.Exception e)
throws WgetException
WgetException
streamToElement
org.jdom.Element streamToElement(java.io.InputStream is)
throws java.io.IOException,
java.io.FileNotFoundException,
org.jdom.JDOMException,
WgetException
- Convert an InputStream into a jdom tree of xhtml.
- Parameters:
is
- An InputStream
- Returns:
- a jdom tree.
- Throws:
java.io.IOException
java.io.FileNotFoundException
org.jdom.JDOMException
WgetException
isRedirected
public static boolean isRedirected(java.net.URLConnection connection)
throws java.io.IOException
java.io.IOException
isAlreadyProcessed
boolean isAlreadyProcessed(java.net.URL url)
addAlreadyProcessed
void addAlreadyProcessed(java.net.URL url)
getUrlValidator
public WgetURLValidator getUrlValidator()
setUrlValidator
public void setUrlValidator(WgetURLValidator urlValidator)
isSpiderURLs
public boolean isSpiderURLs()
setSpiderURLs
public void setSpiderURLs(boolean v)