wget.java

Programming 2008.03.17 13:21

org.apache.lenya.net
Class WGet

java.lang.Object
  extended by org.apache.lenya.net.WGet

public class WGet
extends java.lang.Object

Similar to the UNIX wget


Constructor Summary
WGet()
Creates a new WGet object.

Method Summary
java.lang.String createFileName(java.net.URL url, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
byte[] download(java.net.URL url, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
byte[] downloadUsingHttpClient(java.net.URL url, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
DOCUMENT ME!
java.lang.String escapeSlashes(java.lang.String string)
Escape slashes
java.util.List getLinks(java.net.URL url)
byte[] getResource(java.net.URL url)
static void main(java.lang.String[] args)
DOCUMENT ME!
byte[] runProcess(java.lang.String command)
void saveToFile(java.lang.String filename, byte[] bytes)
void setDirectoryPrefix(java.lang.String directory_prefix)
-P
void substitutePrefix(java.lang.String filename, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
Substitute prefix, e.g.
java.lang.String toString()
DOCUMENT ME!

Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait

Constructor Detail

WGet

public WGet()
Creates a new WGet object.
Method Detail

main

public static void main(java.lang.String[] args)
DOCUMENT ME!
Parameters:
args - DOCUMENT ME!

setDirectoryPrefix

public void setDirectoryPrefix(java.lang.String directory_prefix)
-P
Parameters:
directory_prefix - DOCUMENT ME!

download

public byte[] download(java.net.URL url,
                       java.lang.String prefixSubstitute,
                       java.lang.String substituteReplacement)
                throws java.io.IOException
Parameters:
url - The url of the resource to download
prefixSubstitute - Regexp which shall be replaced
substituteReplacement - Replacement of the regexp
Returns:
bytes of downloaded resource
Throws:
java.io.IOException - URL might not exist

downloadUsingHttpClient

public byte[] downloadUsingHttpClient(java.net.URL url,
                                      java.lang.String prefixSubstitute,
                                      java.lang.String substituteReplacement)
DOCUMENT ME!
Parameters:
url - DOCUMENT ME!
prefixSubstitute - DOCUMENT ME!
Returns:
DOCUMENT ME!

getResource

public byte[] getResource(java.net.URL url)
                   throws java.io.IOException
Throws:
java.io.IOException

getLinks

public java.util.List getLinks(java.net.URL url)
                        throws java.io.IOException
Throws:
java.io.IOException

substitutePrefix

public void substitutePrefix(java.lang.String filename,
                             java.lang.String prefixSubstitute,
                             java.lang.String substituteReplacement)
                      throws java.io.IOException
Substitute prefix, e.g. "/lenya/blog/live/" by "/"
Parameters:
filename - Filename
prefixSubstitute - Prefix which shall be replaced
substituteReplacement - Prefix which is going to replace the original
Throws:
java.io.IOException - DOCUMENT ME!

escapeSlashes

public java.lang.String escapeSlashes(java.lang.String string)
Escape slashes
Returns:
String with escaped slashes

toString

public java.lang.String toString()
DOCUMENT ME!
Overrides:
toString in class java.lang.Object
Returns:
DOCUMENT ME!

saveToFile

public void saveToFile(java.lang.String filename,
                       byte[] bytes)
                throws java.io.FileNotFoundException,
                       java.io.IOException
Throws:
java.io.FileNotFoundException
java.io.IOException

createFileName

public java.lang.String createFileName(java.net.URL url,
                                       java.lang.String prefixSubstitute,
                                       java.lang.String substituteReplacement)
Parameters:
url - URL of resource, which has been downloaded and shall be saved
Returns:
Absolute substituted filename

runProcess

public byte[] runProcess(java.lang.String command)
                  throws java.lang.Exception
Throws:
java.lang.Exception

Copyright © 1999-2005 Apache Software Foundation. All Rights Reserved.

소스코드

  1. /*
    * Copyright 1999-2004 The Apache Software Foundation
    *
    * Licensed under the Apache License, Version 2.0 (the "License");
    * you may not use this file except in compliance with the License.
    * You may obtain a copy of the License at
    *
    * http://www.apache.org/licenses/LICENSE-2.0
    *
    * Unless required by applicable law or agreed to in writing, software
    * distributed under the License is distributed on an "AS IS" BASIS,
    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    * See the License for the specific language governing permissions and
    * limitations under the License.
    *
    */
  2. /* $Id: WGet.java,v 1.32 2004/03/01 16:18:25 gregor Exp $ */
  3. package org.apache.lenya.net;
  4. import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.util.Iterator;
    import java.util.List;
  5. import org.apache.log4j.Category;

  6. /**
    * Similar to the UNIX wget
    */
    public class WGet {
    static Category log = Category.getInstance(WGet.class);
    String directory_prefix = null;
  7. /**
    * Creates a new WGet object.
    */
    public WGet() {
    directory_prefix = System.getProperty("user.dir");
    }
  8. /**
    * DOCUMENT ME!
    *
    * @param args DOCUMENT ME!
    */
    public static void main(String[] args) {
    if (args.length == 0) {
    System.out.println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
  9. return;
    }
  10. try {
    WGet wget = new WGet();
  11. for (int i = 0; i < args.length; i++) {
    if (args[i].indexOf("-P") == 0) {
    wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download, 2: remove "-P"
    }
    }
  12. byte[] response = wget.download(new URL(args[0]), "s/\\/lenya\\/oscom//g", "");
    } catch (MalformedURLException e) {
    System.err.println(e);
    } catch (Exception e) {
    System.err.println(e);
    }
    }
  13. /**
    * -P
    *
    * @param directory_prefix DOCUMENT ME!
    */
    public void setDirectoryPrefix(String directory_prefix) {
    this.directory_prefix = directory_prefix;
    }
  14. /**
    * @param url The url of the resource to download
    * @param prefixSubstitute Regexp which shall be replaced
    * @param substituteReplacement Replacement of the regexp
    *
    * @return bytes of downloaded resource
    *
    * @throws IOException URL might not exist
    */
    public byte[] download(URL url, String prefixSubstitute, String substituteReplacement)
    throws IOException {
    log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
  15. return downloadUsingHttpClient(url, prefixSubstitute, substituteReplacement);
    }
  16. /**
    * DOCUMENT ME!
    *
    * @param url DOCUMENT ME!
    * @param prefixSubstitute DOCUMENT ME!
    *
    * @return DOCUMENT ME!
    */
    public byte[] downloadUsingHttpClient(URL url, String prefixSubstitute,
    String substituteReplacement) {
    log.debug(".downloadUsingHttpClient(): " + url);
  17. byte[] sresponse = null;
  18. try {
    sresponse = getResource(url);
  19. File file = new File(createFileName(url, prefixSubstitute, substituteReplacement));
  20. saveToFile(file.getAbsolutePath(), sresponse);
  21. substitutePrefix(file.getAbsolutePath(), prefixSubstitute, substituteReplacement);
    } catch (MalformedURLException e) {
    log.error(".downloadUsingHttpClient(): ", e);
    } catch (FileNotFoundException e) {
    log.error(".downloadUsingHttpClient(): ", e);
    } catch (IOException e) {
    log.error(".downloadUsingHttpClient(): ", e);
    }
  22. List links = null;
  23. try {
    links = getLinks(url);
    } catch (IOException ioe) {
    log.error(".downloadUsingHttpClient(): ", ioe);
    }
  24. if (links != null) {
    Iterator iterator = links.iterator();
  25. while (iterator.hasNext()) {
    String link = (String) iterator.next();
  26. try {
    URL child_url = new URL(org.apache.lenya.util.URLUtil.complete(url.toString(),
    link));
  27. byte[] child_sresponse = getResource(child_url);
    saveToFile(createFileName(child_url, prefixSubstitute, substituteReplacement),
    child_sresponse);
    } catch (Exception e) {
    log.error(".downloadUsingHttpClient(): ", e);
    }
    }
    }
  28. return sresponse;
    }
  29. /**
    *
    */
    public byte[] getResource(URL url) throws IOException {
    log.debug(".getResource(): " + url);
  30. HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection();
    InputStream in = httpConnection.getInputStream();
    byte[] buffer = new byte[1024];
    int bytes_read;
    ByteArrayOutputStream bufferOut = new ByteArrayOutputStream();
  31. while ((bytes_read = in.read(buffer)) != -1) {
    bufferOut.write(buffer, 0, bytes_read);
    }
  32. byte[] sresponse = bufferOut.toByteArray();
    httpConnection.disconnect();
  33. return sresponse;
    }
  34. /**
    *
    */
    public List getLinks(URL url) throws IOException {
    log.debug(".getLinks(): Get links from " + url);
  35. List links = null;
  36. try {
    org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(url.toString());
    links = html.getImageSrcs(false);
    links.addAll(html.getLinkHRefs(false));
    } catch (Exception e) {
    log.error(".getLinks() Exception 423432: ", e);
    }
  37. if (links != null) {
    log.debug(".getLinks(): Number of links found: " + links.size());
    }
  38. return links;
    }
  39. /**
    * Substitute prefix, e.g. "/lenya/blog/live/" by "/"
    *
    * @param filename Filename
    * @param prefixSubstitute Prefix which shall be replaced
    * @param substituteReplacement Prefix which is going to replace the original
    *
    * @throws IOException DOCUMENT ME!
    */
    public void substitutePrefix(String filename, String prefixSubstitute, String substituteReplacement) throws IOException {
    log.debug("Replace " + prefixSubstitute + " by " + substituteReplacement);
  40. org.apache.lenya.util.SED.replaceAll(new File(filename), escapeSlashes(prefixSubstitute), escapeSlashes(substituteReplacement));
    }
  41. /**
    * Escape slashes
    *
    * @return String with escaped slashes
    */
    public String escapeSlashes(String string) {
    StringBuffer buffer = new StringBuffer("");
  42. for (int i = 0; i < string.length(); i++) {
    if (string.charAt(i) == '/') {
    buffer.append("\\/");
    } else {
    buffer.append(string.charAt(i));
    }
    }
  43. return buffer.toString();
    }
  44. /**
    * DOCUMENT ME!
    *
    * @return DOCUMENT ME!
    */
    public String toString() {
    return "-P: " + directory_prefix;
    }
  45. /**
    *
    */
    public void saveToFile(String filename, byte[] bytes)
    throws FileNotFoundException, IOException {
    File file = new File(filename);
    File parent = new File(file.getParent());
  46. if (!parent.exists()) {
    log.warn(".saveToFile(): Directory will be created: " + parent.getAbsolutePath());
    parent.mkdirs();
    }
  47. FileOutputStream out = new FileOutputStream(file.getAbsolutePath());
    out.write(bytes);
    out.close();
    }
  48. /**
    * @param url URL of resource, which has been downloaded and shall be saved
    * @return Absolute substituted filename
    */
    public String createFileName(URL url, String prefixSubstitute, String substituteReplacement) {
    File file = new File(directory_prefix + File.separator + url.getFile());
  49. return file.getAbsolutePath().replaceAll(prefixSubstitute, substituteReplacement);
    }
  50. /**
    *
    */
    public byte[] runProcess(String command) throws Exception {
    Process process = Runtime.getRuntime().exec(command);
  51. java.io.InputStream in = process.getInputStream();
    byte[] buffer = new byte[1024];
    int bytes_read = 0;
    java.io.ByteArrayOutputStream baout = new java.io.ByteArrayOutputStream();
  52. while ((bytes_read = in.read(buffer)) != -1) {
    baout.write(buffer, 0, bytes_read);
    }
  53. if (baout.toString().length() > 0) {
    log.debug(".runProcess(): %%%InputStream:START" + baout.toString() +
    "END:InputStream%%%");
    }
  54. java.io.InputStream in_e = process.getErrorStream();
    java.io.ByteArrayOutputStream baout_e = new java.io.ByteArrayOutputStream();
  55. while ((bytes_read = in_e.read(buffer)) != -1) {
    baout_e.write(buffer, 0, bytes_read);
    }
  56. if (baout_e.toString().length() > 0) {
    log.error(".runProcess(): ###ErrorStream:START" + baout_e.toString() +
    "END:ErrorStream###");
    }
  57. return baout.toByteArray();
    }
    }

이 글은 스프링노트에서 작성되었습니다.

신고
크리에이티브 커먼즈 라이선스
Creative Commons License

'Programming' 카테고리의 다른 글

Quartz properties  (0) 2008.05.27
SpringFramework + Quartz  (0) 2008.04.28
wget.java  (0) 2008.03.17
simple java wget  (0) 2008.03.17

simple java wget

Programming 2008.03.17 13:21

java로 wget을 구현하기 위해서 열심히 알아본결과 아래와 같은 소스를 구했다!!+_+

 

URL 객체를 이용하여 wget 구현하기! 오오~~+_+

원본글 : http://www.koders.com/java/fidB84D3CA3F6C5603C64C112C6CF0A659FF11AF014.aspx?s=wget


  1. import java.io.IOException;
    import java.net.URL;
    import java.net.URLConnection;
  2. public class Wget
    {
  3. /*
    private final static org.apache.log4j.Logger log = org.apache.log4j.Logger
    .getLogger(Wget.class.getName());
    */
  4. //
    // CONSTANTS & co
  5. /**
    * How many bytes should get read at once ?
    */
    public final static int READ_BUFFER_SIZE = 2048;
  6. public final static String BANNER
    = "OpenWFE Wget 0.0.5 - simple java wget";
  7. /**
    * After 7 unsuccessful reads on the wire, Wget will consider the download
    * as done.
    */
    public final static int MAX_RETRIES = 42;
  8. //
    // FIELDS
  9. //
    // CONSTRUCTORS
  10. //
    // METHODS
  11. //
    // STATIC METHODS
  12. /**
    * Same as wget(d, u), but verbosity is off.
    *
    * @return true if the downloading actually happened
    */
    public static boolean wget
    (final String downloadDir, final String sUrl)
    throws IOException
    {
    return wget(downloadDir, sUrl, false, false);
    }
  13. /**
    * This method is public so that applications may use it (as a library).
    *
    * @return true if the downloading actually happened
    * @throws IOException
    */
    public static boolean wget
    (String downloadDir,
    final String sUrl,
    final boolean head,
    final boolean verbose) throws IOException

    {
    if ( ! downloadDir.endsWith(java.io.File.separator))
    downloadDir += java.io.File.separator;
  14. int i = sUrl.lastIndexOf("/");
    String fileName = sUrl.substring(i+1);
    fileName = downloadDir + fileName;
  15. final java.io.File targetFile = new java.io.File(fileName);
  16. final URL url = new URL(sUrl);
  17. final URLConnection con = url.openConnection();
  18. final long remoteLastModified = con.getLastModified();
  19. if (head && targetFile.exists() && verbose)
    {
    System.out.println("..wget() local "+targetFile.lastModified());
    System.out.println("..wget() remote "+remoteLastModified);
    }
  20. if (head &&
    targetFile.exists() &&
    remoteLastModified != 0 &&
    targetFile.lastModified() >= remoteLastModified)
    {
    if (verbose)
    {
    System.out.println
    ("..wget() local file fresher than web version. "+
    "Not downloading.");
    }
  21. return false;
    }
  22. if (verbose)
    System.out.println("..wget() will save to "+fileName);
  23. int contentLength = con.getContentLength();
  24. if (verbose)
    System.out.println("..wget() contentLength is "+contentLength);
  25. if (contentLength < 0) contentLength = Integer.MAX_VALUE;
  26. final java.io.InputStream is = url.openStream();
  27. final java.io.FileOutputStream fos =
    new java.io.FileOutputStream(fileName);
  28. byte[] buffer = new byte[READ_BUFFER_SIZE];
  29. int totalRead = 0;
    int retries = 0;
  30. while (true)
    {
    int read = is.read(buffer);
  31. totalRead += read;
  32. if (verbose)
    {
    System.out.println
    ("..wget() read "+read+
    " bytes ("+totalRead+"/"+contentLength+") r"+retries);
    }
  33. if (read > 0)
    {
    fos.write(buffer, 0, read);
    fos.flush();
  34. retries = 0;
    }
  35. if (totalRead >= contentLength) break;
  36. if (read < READ_BUFFER_SIZE)
    {
    if (retries >= MAX_RETRIES)
    {
    if (verbose)
    System.out.println("..wget() giving up.");
  37. break;
    }
  38. if (read < 1)
    {
    //Thread.sleep(14);
    Thread.yield();
    retries++;
    }
    }
    }
  39. //fos.flush();
    fos.close();
    is.close();
  40. return true;
    }
  41. private static void mkdir (final String dir, final boolean verbose)
    throws Exception
    {
    final java.io.File f = new java.io.File(dir);
  42. if (f.exists() && ( ! f.isDirectory()))
    {
    throw new IllegalArgumentException
    ("dir '"+dir+"' already exists and it's not a directory.");
    }
  43. if (f.exists())
    {
    if (verbose)
    System.out.println("..wget() dir '"+dir+"' already present");
  44. return;
    }
  45. f.mkdirs();
  46. if (verbose)
    System.out.println("..wget() made dir '"+dir+"'");
    }
  47. /**
    * Reads the file behind the URL, which should be an enumeration
    * of files to further download..
    * <br>
    * example :<br>
    * <pre>
    * #
    * # download list
    *
    * . http://remote.host.tld/images/icon1.png
    * . http://remote.host.tld/content/text2.xml
    * mkdir html
    * mkdir icons
    * html http://other.remote.host.tld/index.htm
    * server http://host1/
    * . index1.htm
    * icons/ icon1.png
    * server http://host2/
    * . index2.htm
    * </pre>
    * <br>
    * 'mkdir' will create a local directory<br>
    * 'server' allows for lighter lists
    */
    public static void downloadList
    (String downloadListUrl,
    final boolean head,
    final boolean verbose)
    throws
    Exception
    {
    if (downloadListUrl.indexOf("://") < 0)
    downloadListUrl = "file:" + downloadListUrl;
  48. final URL dUrl = new URL(downloadListUrl);
  49. final URLConnection con = dUrl.openConnection();
  50. final java.io.BufferedReader br = new java.io.BufferedReader
    (new java.io.InputStreamReader(con.getInputStream()));
  51. String server = null;
  52. while (true)
    {
    String line = br.readLine();
  53. if (line == null) break;
  54. line = line.trim();
  55. if (line.length() < 1 || line.startsWith("#")) continue;
  56. if (line.startsWith("mkdir "))
    {
    mkdir(line.substring(6).trim(), verbose);
    continue;
    }
    if (line.startsWith("server"))
    {
    if (line.trim().length() == 6)
    server = null;
    else
    server = line.substring(7).trim();
  57. if (server != null && ( ! server.endsWith("/")))
    server += "/";
  58. continue;
    }
  59. final int i = line.indexOf(" ");
  60. String downloadDir = ".";
    String urlToDownload = line;
  61. if (i > -1)
    {
    downloadDir = line.substring(0, i).trim();
    urlToDownload = line.substring(i+1).trim();
    }
  62. if (server != null && urlToDownload.indexOf("://") < 1)
    {
    urlToDownload = server + urlToDownload;
    }
  63. if (verbose)
    {
    //System.out.println
    // ("..wget() >"+line+"<");
    System.out.println
    ("..wget() todir >"+downloadDir+
    "< fromURL >"+urlToDownload+"<");
    }
  64. final boolean b = wget(downloadDir, urlToDownload, head, verbose);
  65. if (b)
    System.out.println("x "+urlToDownload);
    else
    System.out.println(". "+urlToDownload);
    }
    }
  66. private static void printUsage ()
    {
    final String cmd = "java "+Wget.class.getName();
  67. System.out.println();
    System.out.println(BANNER);
    System.out.println();
    System.out.println("USAGE :");
    System.out.println();
    System.out.print (cmd);
    System.out.println(" [-d {downloadDir}] [-v] [-H] {URL}*");
    System.out.println(" [-v] [-H] -l {URL of a download list}");
    System.out.println();
    System.out.println("Wget is a java 'wget', with not much features.");
    System.out.println();
    System.out.println(" -v : verbose");
    System.out.println(" -h : prints this usage and exits");
    System.out.println(" -H : HEAD, will not download if local resource fresher than web resource");
    System.out.println(" -l : the files and their target dir are enumerated in a list");
    System.out.println(" (behind a URL or within a local file)");
    System.out.println();
  68. System.exit(-1);
    }
  69. public static void main (final String[] args)
    throws Exception
    {
    if (args.length < 1) printUsage();
  70. boolean head = false;
    boolean verbose = false;
  71. int index = 0;
  72. String downloadDir = ".";
    String downloadList = null;
  73. while (index < args.length && args[index].startsWith("-"))
    {
    if (args[index].equals("-d"))
    {
    if (args.length - index < 2) printUsage();
  74. downloadDir = args[index+1];
    index += 2;
  75. }
    else if (args[index].equals("-v"))
    {
    verbose = true;
    index++;
  76. //System.out.println("...verbose");
    }
    else if (args[index].equals("-H"))
    {
    head = true;
    index++;
    }
    else if (args[index].equals("-h"))
    {
    printUsage();
    }
    else if (args[index].equals("-l"))
    {
    downloadList = args[index+1];
    index += 2;
    }
    }
  77. if (downloadList != null)
    {
    System.out.println("...downloadList is at "+downloadList);
    downloadList(downloadList, head, verbose);
    System.exit(0);
    }
  78. //
    // do download
  79. for (int i=index; i<args.length; i++)
    {
    //System.out.println("args["+i+"] is >"+args[i]+"<");

    wget(downloadDir, args[i], head, verbose);
    System.out.println("...got "+args[i]);
    }
    }
  80. }

 

헌데!! 조금더 알아본결과...

http://lenya.apache.org/apidocs/1.2/org/apache/lenya/net/WGet.html#downloadUsingHttpClient(java.net.URL,%20java.lang.String,%20java.lang.String

Class WGet

java.lang.Object
  extended by org.apache.lenya.net.WGet

이미 Wget 라이브러리가 있었던 것이었던 것이다~

이걸 이용하면 조금더 짧게 가능하긴 하겠는데.....

그냥 위 소스를 사용해야겠다...ㅋ;;

이 글은 스프링노트에서 작성되었습니다.

신고
크리에이티브 커먼즈 라이선스
Creative Commons License

'Programming' 카테고리의 다른 글

Quartz properties  (0) 2008.05.27
SpringFramework + Quartz  (0) 2008.04.28
wget.java  (0) 2008.03.17
simple java wget  (0) 2008.03.17


티스토리 툴바