2023年11月28日发(作者:)
java抓取网页内容三种方式
2011-12-05 11:23
一、
import .*;
import .*;
public class GetURL {
public static void main(String[] args) {
InputStream in = null;
OutputStream out = null;
try {
// 检查命令行参数
if (( != 1)&& ( != 2))
throw new IllegalArgumentException("Wrong number of args");
URL url = new URL(args[0]); //创建 URL
in = ream(); // 打开到这个URL的流
if ( == 2) // 创建一个适当的输出流
out = new FileOutputStream(args[1]);
else out = ;
<%@ page import=".*" contentType="text/html;charset=gb2312" %>
<%@ page language="java" import=".*"%>
<%
String htmpath=null;
BufferedReader in = null;
InputStreamReader isr = null;
InputStream is = null;
PrintWriter pw=null;
HttpURLConnection huc = null;
import .*;
public class HttpClient {
public static void main(String[] args) {
try {
// 检查命令行参数
if (( != 1) && ( != 2))
throw new IllegalArgumentException("Wrong number of args");
OutputStream to_file;
if ( == 2)
to_file = new FileOutputStream(args[1]);//输出到文件
else
to_file = ;//输出到控制台
URL url = new URL(args[0]);
String protocol = tocol();
}
catch (Exception e) {
n(e);
n("Usage: java HttpClient
}
}
}
运行方法:C:java>java HttpClient 127.0.0.1:8080/kj/
注意中文可能会显示乱码,在得到源码后,应该做相应的转码工作,例如:
public static String GetURLstr(String strUrl)
{
InputStream in = null;
OutputStream out = null;
String strdata = "";
try
n("Usage: java GetURL


发布评论