im
port java.io.BufferedInputStream;

im
port java.io.BufferedReader;
im
port java.io.File;
im
port java.io.FileNotFoundException;
im
port java.io.FileOutputStream;
im
port java.io.IOException;
im
port java.io.InputStream;
im
port java.io.InputStreamReader;
im
port java.net.HttpURLConnection;
im
port java.net.MalformedURLException;
im
port java.net.URL;
im
port java.net.URLConnection;
im
port java.net.URLEncoder;
im
port java.util.ArrayList;
im
port java.util.Iterator;
im
port java.util.List;
im
port java.util.regex.Matcher;
im
port java.util.regex.Pattern;
im
port org.apache.commons.httpclient.*;
im
port org.apache.commons.httpclient.methods.GetMethod;
im
port org.apache.commons.httpclient.methods.PostMethod;
im
port org.apache.commons.httpclient.params.HttpClientParams;
im
port org.apache.commons.httpclient.params.HttpMethodParams;
im
port org.jsoup.Jsoup;
im
port org.jsoup.nodes.docu
ment;
im
port org.jsoup.nodes.Element;
im
port org.jsoup.selec
t.Elements;
public class BatchDownload {
public static void main(String[] args) {
//百度图片
String fromUrl = "http://image.baidu.com/i?tn=baiduimage&ct=201326592&cl=2&lm=-1&fr=&fmq=&pv=&ic=0&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&word=%B3%B5&s=0";
StringBuffer pageCo
ntents = new StringBuffer();
//System.out.println("asdasdas");
try{
URL startUrl = new URL(fromUrl);
pageCo
ntents = downloadPage(startUrl);
//System.out.println(pageContents);
// if(pageContents.length()==0) System.out.println("asdasdas");
List<String> imgUrls = getImageUrls(pageContents);
//System.out.println(imgUrls.size());
//downloadImages(pageContents);
//if(pageContents.length()==0) System.out.println("asdasdas");
//System.out.println(pageContents);
}
catch(Exception e)
{
}
}
public static StringBuffer downloadPage(URL httpUrl)throws MalformedURLException,IOException
{
StringBuffer data = new StringBuffer();
String currentLine;
// 打开输入流
BufferedReader reader = new BufferedReader(new InputStreamReader(getInputStream(httpUrl), "GBK"));
// 读取数据
while ((currentLine = reader.readLine()) != null) {
data.append(currentLine);
}
reader.close();
return data;
}
public static File downloadFile(String httpUrl, String fileSavePath)throws MalformedURLException, IOException
{
File file = new File(fileSavePath);
if (!file.exists()) {
file.createNewFile();
}
URL url = new URL(httpUrl);
// 打开输入流
BufferedInputStream in = new BufferedInputStream(
getInputStream(url));
// 打开输出流
FileOutputStream out = new FileOutputStream(file);
byte[] buff = new byte[1];
// 读取数据
while (in.read(buff) > 0) {
out.write(buff);
}
out.flush();
out.close();
in.close();
return file;
}
private static void downloadImages(StringBuffer pageContents)throws MalformedURLException, IOException
{
// 获取html页面
StringBuffer page = pageContents;
// 获取页面中的地址
List<String> imgUrls = getImageUrls(page);
// 保存图片,返回文件列表
List<File> fileList = new ArrayList<File>();
String imgSaveDir="E:";
int i = 1;
for (String url : imgUrls)
{
String fileName = url.substrin
g(url.lastIndexOf("/") + 1);
File file = downloadFile(url, imgSaveDir + "\" + fileName);
System.out.println(file.getPath()+ " 下载完成!");
fileList.add(file);
i++;
}
}
private static InputStream getInputStream(URL httpUrl) throws IOException
{
// 网页Url
URL url = httpUrl;
URLCo
nnection uc = url.openCo
nnection();
uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
return uc.getInputStream();
}
public static List<String> getImageUrls(StringBuffer html)
{
List<String> result = new ArrayList<String>();
// 将字符串解析为html文档
docu
ment doc = Jsoup.parse(html.toString());
// 获取img标签
Elements es =doc.getElementsByTag("img");
//Element es = doc.getElementById("s
cript");
//Elements ss = new Elements();
//ss.add(es);
//System.out.println(es.size());
//doc.getElementsByTag("img");
// 获取每一个img标签src的内容,也就是图片地址
for (Iterator<Element> i = es.iterator(); i.hasNext();)
{
Element e = i.next();
String r = e.attr("src");
Pattern p = Pattern.compile("http://.+\.(jpg|jpeg)");
Matcher m = p.matcher(r);
if (m.matches())
{
result.add(r);
}
}
return result;
}