wget.java

Programming 2008.03.17 13:21

org.apache.lenya.net
Class WGet

java.lang.Object
  extended by org.apache.lenya.net.WGet

public class WGet
extends java.lang.Object

Similar to the UNIX wget


Constructor Summary
WGet()
Creates a new WGet object.

Method Summary
java.lang.String createFileName(java.net.URL url, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
byte[] download(java.net.URL url, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
byte[] downloadUsingHttpClient(java.net.URL url, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
DOCUMENT ME!
java.lang.String escapeSlashes(java.lang.String string)
Escape slashes
java.util.List getLinks(java.net.URL url)
byte[] getResource(java.net.URL url)
static void main(java.lang.String[] args)
DOCUMENT ME!
byte[] runProcess(java.lang.String command)
void saveToFile(java.lang.String filename, byte[] bytes)
void setDirectoryPrefix(java.lang.String directory_prefix)
-P
void substitutePrefix(java.lang.String filename, java.lang.String prefixSubstitute, java.lang.String substituteReplacement)
Substitute prefix, e.g.
java.lang.String toString()
DOCUMENT ME!

Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait

Constructor Detail

WGet

public WGet()
Creates a new WGet object.
Method Detail

main

public static void main(java.lang.String[] args)
DOCUMENT ME!
Parameters:
args - DOCUMENT ME!

setDirectoryPrefix

public void setDirectoryPrefix(java.lang.String directory_prefix)
-P
Parameters:
directory_prefix - DOCUMENT ME!

download

public byte[] download(java.net.URL url,
                       java.lang.String prefixSubstitute,
                       java.lang.String substituteReplacement)
                throws java.io.IOException
Parameters:
url - The url of the resource to download
prefixSubstitute - Regexp which shall be replaced
substituteReplacement - Replacement of the regexp
Returns:
bytes of downloaded resource
Throws:
java.io.IOException - URL might not exist

downloadUsingHttpClient

public byte[] downloadUsingHttpClient(java.net.URL url,
                                      java.lang.String prefixSubstitute,
                                      java.lang.String substituteReplacement)
DOCUMENT ME!
Parameters:
url - DOCUMENT ME!
prefixSubstitute - DOCUMENT ME!
Returns:
DOCUMENT ME!

getResource

public byte[] getResource(java.net.URL url)
                   throws java.io.IOException
Throws:
java.io.IOException

getLinks

public java.util.List getLinks(java.net.URL url)
                        throws java.io.IOException
Throws:
java.io.IOException

substitutePrefix

public void substitutePrefix(java.lang.String filename,
                             java.lang.String prefixSubstitute,
                             java.lang.String substituteReplacement)
                      throws java.io.IOException
Substitute prefix, e.g. "/lenya/blog/live/" by "/"
Parameters:
filename - Filename
prefixSubstitute - Prefix which shall be replaced
substituteReplacement - Prefix which is going to replace the original
Throws:
java.io.IOException - DOCUMENT ME!

escapeSlashes

public java.lang.String escapeSlashes(java.lang.String string)
Escape slashes
Returns:
String with escaped slashes

toString

public java.lang.String toString()
DOCUMENT ME!
Overrides:
toString in class java.lang.Object
Returns:
DOCUMENT ME!

saveToFile

public void saveToFile(java.lang.String filename,
                       byte[] bytes)
                throws java.io.FileNotFoundException,
                       java.io.IOException
Throws:
java.io.FileNotFoundException
java.io.IOException

createFileName

public java.lang.String createFileName(java.net.URL url,
                                       java.lang.String prefixSubstitute,
                                       java.lang.String substituteReplacement)
Parameters:
url - URL of resource, which has been downloaded and shall be saved
Returns:
Absolute substituted filename

runProcess

public byte[] runProcess(java.lang.String command)
                  throws java.lang.Exception
Throws:
java.lang.Exception

Copyright © 1999-2005 Apache Software Foundation. All Rights Reserved.

소스코드

  1. /*
    * Copyright 1999-2004 The Apache Software Foundation
    *
    * Licensed under the Apache License, Version 2.0 (the "License");
    * you may not use this file except in compliance with the License.
    * You may obtain a copy of the License at
    *
    * http://www.apache.org/licenses/LICENSE-2.0
    *
    * Unless required by applicable law or agreed to in writing, software
    * distributed under the License is distributed on an "AS IS" BASIS,
    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    * See the License for the specific language governing permissions and
    * limitations under the License.
    *
    */
  2. /* $Id: WGet.java,v 1.32 2004/03/01 16:18:25 gregor Exp $ */
  3. package org.apache.lenya.net;
  4. import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.util.Iterator;
    import java.util.List;
  5. import org.apache.log4j.Category;

  6. /**
    * Similar to the UNIX wget
    */
    public class WGet {
    static Category log = Category.getInstance(WGet.class);
    String directory_prefix = null;
  7. /**
    * Creates a new WGet object.
    */
    public WGet() {
    directory_prefix = System.getProperty("user.dir");
    }
  8. /**
    * DOCUMENT ME!
    *
    * @param args DOCUMENT ME!
    */
    public static void main(String[] args) {
    if (args.length == 0) {
    System.out.println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
  9. return;
    }
  10. try {
    WGet wget = new WGet();
  11. for (int i = 0; i < args.length; i++) {
    if (args[i].indexOf("-P") == 0) {
    wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download, 2: remove "-P"
    }
    }
  12. byte[] response = wget.download(new URL(args[0]), "s/\\/lenya\\/oscom//g", "");
    } catch (MalformedURLException e) {
    System.err.println(e);
    } catch (Exception e) {
    System.err.println(e);
    }
    }
  13. /**
    * -P
    *
    * @param directory_prefix DOCUMENT ME!
    */
    public void setDirectoryPrefix(String directory_prefix) {
    this.directory_prefix = directory_prefix;
    }
  14. /**
    * @param url The url of the resource to download
    * @param prefixSubstitute Regexp which shall be replaced
    * @param substituteReplacement Replacement of the regexp
    *
    * @return bytes of downloaded resource
    *
    * @throws IOException URL might not exist
    */
    public byte[] download(URL url, String prefixSubstitute, String substituteReplacement)
    throws IOException {
    log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
  15. return downloadUsingHttpClient(url, prefixSubstitute, substituteReplacement);
    }
  16. /**
    * DOCUMENT ME!
    *
    * @param url DOCUMENT ME!
    * @param prefixSubstitute DOCUMENT ME!
    *
    * @return DOCUMENT ME!
    */
    public byte[] downloadUsingHttpClient(URL url, String prefixSubstitute,
    String substituteReplacement) {
    log.debug(".downloadUsingHttpClient(): " + url);
  17. byte[] sresponse = null;
  18. try {
    sresponse = getResource(url);
  19. File file = new File(createFileName(url, prefixSubstitute, substituteReplacement));
  20. saveToFile(file.getAbsolutePath(), sresponse);
  21. substitutePrefix(file.getAbsolutePath(), prefixSubstitute, substituteReplacement);
    } catch (MalformedURLException e) {
    log.error(".downloadUsingHttpClient(): ", e);
    } catch (FileNotFoundException e) {
    log.error(".downloadUsingHttpClient(): ", e);
    } catch (IOException e) {
    log.error(".downloadUsingHttpClient(): ", e);
    }
  22. List links = null;
  23. try {
    links = getLinks(url);
    } catch (IOException ioe) {
    log.error(".downloadUsingHttpClient(): ", ioe);
    }
  24. if (links != null) {
    Iterator iterator = links.iterator();
  25. while (iterator.hasNext()) {
    String link = (String) iterator.next();
  26. try {
    URL child_url = new URL(org.apache.lenya.util.URLUtil.complete(url.toString(),
    link));
  27. byte[] child_sresponse = getResource(child_url);
    saveToFile(createFileName(child_url, prefixSubstitute, substituteReplacement),
    child_sresponse);
    } catch (Exception e) {
    log.error(".downloadUsingHttpClient(): ", e);
    }
    }
    }
  28. return sresponse;
    }
  29. /**
    *
    */
    public byte[] getResource(URL url) throws IOException {
    log.debug(".getResource(): " + url);
  30. HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection();
    InputStream in = httpConnection.getInputStream();
    byte[] buffer = new byte[1024];
    int bytes_read;
    ByteArrayOutputStream bufferOut = new ByteArrayOutputStream();
  31. while ((bytes_read = in.read(buffer)) != -1) {
    bufferOut.write(buffer, 0, bytes_read);
    }
  32. byte[] sresponse = bufferOut.toByteArray();
    httpConnection.disconnect();
  33. return sresponse;
    }
  34. /**
    *
    */
    public List getLinks(URL url) throws IOException {
    log.debug(".getLinks(): Get links from " + url);
  35. List links = null;
  36. try {
    org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(url.toString());
    links = html.getImageSrcs(false);
    links.addAll(html.getLinkHRefs(false));
    } catch (Exception e) {
    log.error(".getLinks() Exception 423432: ", e);
    }
  37. if (links != null) {
    log.debug(".getLinks(): Number of links found: " + links.size());
    }
  38. return links;
    }
  39. /**
    * Substitute prefix, e.g. "/lenya/blog/live/" by "/"
    *
    * @param filename Filename
    * @param prefixSubstitute Prefix which shall be replaced
    * @param substituteReplacement Prefix which is going to replace the original
    *
    * @throws IOException DOCUMENT ME!
    */
    public void substitutePrefix(String filename, String prefixSubstitute, String substituteReplacement) throws IOException {
    log.debug("Replace " + prefixSubstitute + " by " + substituteReplacement);
  40. org.apache.lenya.util.SED.replaceAll(new File(filename), escapeSlashes(prefixSubstitute), escapeSlashes(substituteReplacement));
    }
  41. /**
    * Escape slashes
    *
    * @return String with escaped slashes
    */
    public String escapeSlashes(String string) {
    StringBuffer buffer = new StringBuffer("");
  42. for (int i = 0; i < string.length(); i++) {
    if (string.charAt(i) == '/') {
    buffer.append("\\/");
    } else {
    buffer.append(string.charAt(i));
    }
    }
  43. return buffer.toString();
    }
  44. /**
    * DOCUMENT ME!
    *
    * @return DOCUMENT ME!
    */
    public String toString() {
    return "-P: " + directory_prefix;
    }
  45. /**
    *
    */
    public void saveToFile(String filename, byte[] bytes)
    throws FileNotFoundException, IOException {
    File file = new File(filename);
    File parent = new File(file.getParent());
  46. if (!parent.exists()) {
    log.warn(".saveToFile(): Directory will be created: " + parent.getAbsolutePath());
    parent.mkdirs();
    }
  47. FileOutputStream out = new FileOutputStream(file.getAbsolutePath());
    out.write(bytes);
    out.close();
    }
  48. /**
    * @param url URL of resource, which has been downloaded and shall be saved
    * @return Absolute substituted filename
    */
    public String createFileName(URL url, String prefixSubstitute, String substituteReplacement) {
    File file = new File(directory_prefix + File.separator + url.getFile());
  49. return file.getAbsolutePath().replaceAll(prefixSubstitute, substituteReplacement);
    }
  50. /**
    *
    */
    public byte[] runProcess(String command) throws Exception {
    Process process = Runtime.getRuntime().exec(command);
  51. java.io.InputStream in = process.getInputStream();
    byte[] buffer = new byte[1024];
    int bytes_read = 0;
    java.io.ByteArrayOutputStream baout = new java.io.ByteArrayOutputStream();
  52. while ((bytes_read = in.read(buffer)) != -1) {
    baout.write(buffer, 0, bytes_read);
    }
  53. if (baout.toString().length() > 0) {
    log.debug(".runProcess(): %%%InputStream:START" + baout.toString() +
    "END:InputStream%%%");
    }
  54. java.io.InputStream in_e = process.getErrorStream();
    java.io.ByteArrayOutputStream baout_e = new java.io.ByteArrayOutputStream();
  55. while ((bytes_read = in_e.read(buffer)) != -1) {
    baout_e.write(buffer, 0, bytes_read);
    }
  56. if (baout_e.toString().length() > 0) {
    log.error(".runProcess(): ###ErrorStream:START" + baout_e.toString() +
    "END:ErrorStream###");
    }
  57. return baout.toByteArray();
    }
    }

이 글은 스프링노트에서 작성되었습니다.

신고
크리에이티브 커먼즈 라이선스
Creative Commons License

'Programming' 카테고리의 다른 글

Quartz properties  (0) 2008.05.27
SpringFramework + Quartz  (0) 2008.04.28
wget.java  (0) 2008.03.17
simple java wget  (0) 2008.03.17


티스토리 툴바