2023年11月28日发(作者:)

java抓取网页内容三种方式

2011-12-05 11:23

一、

import .*;

import .*;

public class GetURL {

public static void main(String[] args) {

InputStream in = null;

OutputStream out = null;

try {

// 检查命令行参数

if (( != 1)&& ( != 2))

throw new IllegalArgumentException("Wrong number of args");

URL url = new URL(args[0]); //创建 URL

in = ream(); // 打开到这个URL的流

if ( == 2) // 创建一个适当的输出流

out = new FileOutputStream(args[1]);

else out = ;

<%@ page import=".*" contentType="text/html;charset=gb2312" %>

<%@ page language="java" import=".*"%>

<%

String htmpath=null;

BufferedReader in = null;

InputStreamReader isr = null;

InputStream is = null;

PrintWriter pw=null;

HttpURLConnection huc = null;

import .*;

public class HttpClient {

public static void main(String[] args) {

try {

// 检查命令行参数

if (( != 1) && ( != 2))

throw new IllegalArgumentException("Wrong number of args");

OutputStream to_file;

if ( == 2)

to_file = new FileOutputStream(args[1]);//输出到文件

else

to_file = ;//输出到控制台

URL url = new URL(args[0]);

String protocol = tocol();

}

catch (Exception e) {

n(e);

n("Usage: java HttpClient []");

}

}

}

运行方法:C:java>java HttpClient 127.0.0.1:8080/kj/

注意中文可能会显示乱码,在得到源码后,应该做相应的转码工作,例如:

public static String GetURLstr(String strUrl)

{

InputStream in = null;

OutputStream out = null;

String strdata = "";

try

n("Usage: java GetURL []");