java获取网页源码
4728 点击·0 回帖
![]() | ![]() | |
![]() | 01 package gogo.cool; 02 03 import java.io.BufferedReader; 04 import java.io.IOException; 05 import java.io.InputStreamReader; 06 import java.net.HttpURLConnection; 07 import java.net.URL; 08 09 public class test1 { 10 11 public static void main(String[] a) throws IOException { 12 13 String url = "http://www.baidu.com"; 14 15 System.out.println(getHTML(url, "gbk")); // 使用原网页里声明的gb2312反而会出现乱码 16 17 } 18 19 public static String getHTML(String pageURL, String encoding) { 20 21 StringBuilder pageHTML = new StringBuilder(); 22 23 try { 24 25 URL url = new URL(pageURL); 26 27 HttpURLConnection connection = (HttpURLConnection) url 28 .openConnection(); 29 30 connection.setRequestProperty("User-Agent", "MSIE 7.0"); 31 32 BufferedReader br = new BufferedReader(new InputStreamReader( 33 connection.getInputStream(), encoding)); 34 35 String line = null; www.atcpu.com 36 37 while ((line = br.readLine()) != null) { 38 39 pageHTML.append(line); 40 41 pageHTML.append("\r\n"); 42 43 } 44 45 connection.disconnect(); 46 47 } catch (Exception e) { 48 49 e.printStackTrace(); 50 51 } 52 53 return pageHTML.toString(); 54 55 } 56 } 作者:neo600 | |
![]() | ![]() |