您的位置 首页 java

java爬虫获取网页源码2种方式(纯净版)

第一种:URL

 package InternetTest;
import  java .io. byte ArrayOutputStream;
import java.io. InputStream ;
import java.net.HttpURLConnection;
import java.net.URL;
public class a44 {
    public  static   void  main(String[] args) throws  Exception  {
        URL url = new URL("#34;);
        HttpURLConnection conn = (HttpURLConnection)url.openConnection();
        conn.set request Method("GET");
        conn.setConnectTimeout(5 * 1024);
        InputStream inStream =  conn.getInputStream();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        byte[] buffer = new byte[1024];
        int len = 0;
        while ((len = inStream.read(buffer)) != -1) {
            outStream.write(buffer, 0, len);
        }
        inStream.close();
        byte[] data =outStream.toByteArray();
        String htmlSource = new String(data);
        System.out.println(htmlSource);
    }
}  

第二种: HttpClient

 package InternetTest;
import org. apache .http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.HttpClientUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class a45 {
    public static void main(String[] args) throws Exception{
        String url1 = "#34;;
        CloseableHttpClient closeableHttpClient = HttpClients.createDefault();
        CloseableHttpResponse closeableHttpResponse = null;
        HttpGet request = new HttpGet(url1);
        closeableHttpResponse = closeableHttpClient.execute(request);
        if(closeableHttpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
            HttpEntity httpEntity = closeableHttpResponse.getEntity();
            String html = EntityUtils.toString(httpEntity, " utf -8");
            System.out.println(html);
        } else {
            System.out.println(EntityUtils.toString(closeableHttpResponse.getEntity(), "utf-8"));
        }
        HttpClientUtils.closeQuietly(closeableHttpResponse);
        HttpClientUtils.closeQuietly(closeableHttpClient);
    }
}
  

文章来源:智云一二三科技

文章标题:java爬虫获取网页源码2种方式(纯净版)

文章地址:https://www.zhihuclub.com/195379.shtml

关于作者: 智云科技

热门文章

网站地图