Csharp WebRequest實作POST來取得網頁內容(及檔案下載)

CSharp要取得網頁內容可以利用內建的WebBrowser來取用IE Container進行連線與資料的取得,WebBrowser實作了很多內容,像是Javascript、HTML parse等等,功能十分強大。

這個範例不使用WebBrowser,而是使用HttpWebRequest來取得遠端的html內容。

Web Server說穿了也就是一個Socket Server,只是Browser在與其溝通時使用了規定的規則而已,以下範例會模擬此規則傳送header來告知Web Server,假裝自己是Browser,而此也會利用cookieContainer來記錄傳輸過程的Session情況,如此登入後的Session就可以在往後的需求連線中被使用。

程式說明:

Server端




  1. 取得post的參數test,並在Console及html內容裡輸出 。
  2. 當Session裡不存在save這個屬性時,設定此屬性,並在Console及html內容裡輸出Save Session訊息。
  3. 當Session的屬性save存在時,代表Session資訊是可被取得的,則會直接把內容輸出在Console及本身的html內容裡。

程式連續二次對Server進行要求,第一次會記錄Session的save屬性,而第二次就會直接輸出save資料內容。

WebRequest jsp輸出內容

程式碼:(JSP)

<%@ page language="java" contentType="text/html; charset=utf-8"
pageEncoding="utf-8"%><%request.setCharacterEncoding("UTF-8"); %>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html> 
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>測試</title>
</head>
<body>
<% 
String data = request.getParameter("test");
System.out.println(data);
out.print(data+"<br/>");
if(session.getAttribute("save")==null)
{
session.setAttribute("save","登入資訊");
System.out.println("Save Session!");
out.print("Save Session!"+"<br/>");
}else
{
System.out.println(session.getAttribute("save"));
out.print(session.getAttribute("save")+"<br/>");
}
%>
</body>
</html>

客戶端

利用WebRequest連續二次對Web Server做要求,並把回傳的內容輸出到Console裡。

上方紅色框內是第一次請求時回傳的內容test=123,而下方藍綠色框內則是第二次請求的內容test=456,可以看到Session的使用是有作用的。

WebRequest回傳內容

在使用WebRequest時,需先設定header及cookie等屬性再把要post出去的資料wirte out,之後才進行read in的動作。

程式碼:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using log4net;
using log4net.Config;
using System.Net;
using System.IO;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Json;
namespace yslifes
{
class WebModule
{
private static readonly ILog logger = LogManager.GetLogger(typeof(WebModule));
//回傳的網頁內容
private StringBuilder buff;
//記錄Session及Cookie,如果登入時Session將一直存在
private static CookieContainer cookie = null;
public WebModule()
{
buff = new StringBuilder();
cookie = new CookieContainer();
}
//取得網頁內容
public string getContent()
{
return buff.ToString();
}
//做post事件
public bool doPost(string sUrl, string data, string referer)
{
bool dosuccess = false;
HttpWebRequest URLConn = null;
try
{
//URL連線
URLConn = (HttpWebRequest)WebRequest.Create(sUrl);
//連線最大等待時間
URLConn.Timeout = 10000;
URLConn.Method = "POST";
//設定Header模擬瀏覽器
URLConn.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-TW; rv:1.9.1.2) "
+ "Gecko/20090729 Firefox/3.5.2 GTB5 (.NET CLR 3.5.30729)";
URLConn.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
URLConn.Headers.Set("Accept-Language",
"zh-tw,en-us;q=0.7,en;q=0.3");
URLConn.Headers.Set("Accept-Charse",
"Big5,utf-8;q=0.7,*;q=0.7");
//設定referer
if (referer != null)
{
URLConn.Referer = referer;
}
//純文字傳送,使用application/x-www-form-urlencoded
//如需傳送檔案,則需用multipart/form-data
URLConn.ContentType = "application/x-www-form-urlencoded";
//自動從導
URLConn.AllowAutoRedirect = true;
if (data == null)
data = "";
logger.Debug(data);
//把要傳送的資料變成binary
byte[] bytes = Encoding.UTF8.GetBytes(data);
URLConn.ContentLength = bytes.Length;
//設定Cookie,Session
URLConn.CookieContainer = cookie;
//送出post資料
if (data.Length > 0)
{
Stream oStreamOut = URLConn.GetRequestStream();
oStreamOut.Write(bytes, 0, bytes.Length);
//oStreamOut.Close();
}
//取回回傳內容
string html = (new StreamReader(URLConn.GetResponse().GetResponseStream())).ReadToEnd();
buff.Clear();
buff.Append(html);
dosuccess = true;
}
catch (Exception ex)
{
logger.Info(ex.StackTrace);
}
finally
{
try
{
if (URLConn != null)
{
URLConn.GetResponse().Close();
URLConn.GetRequestStream().Close();
}
}
catch (Exception exx)
{
logger.Debug(exx.StackTrace);
}
}
return dosuccess;
}
}
class Program
{
static void Main(string[] args)
{
System.IO.FileInfo f = new System.IO.FileInfo("log4net.config");
log4net.Config.XmlConfigurator.Configure(f);
ILog logger = LogManager.GetLogger(typeof(Program));
WebModule module = new WebModule();
module.doPost("http://localhost:8080/test/MyData.jsp", "test=123", null);
Console.WriteLine("解答:"+module.getContent());
module.doPost("http://localhost:8080/test/MyData.jsp", "test=456", null);
Console.WriteLine("解答:" + module.getContent());
Console.ReadLine();
}
}
}

log4net的使用可以參考Log4net Visual Studio版的log4j

Java版可以參考HttpURLConnection來實作get及post動作

增加可下載檔案及可取得經gzip或defalt後的網頁內容

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;
namespace yslifes
{
class WebModule
{
private static Dictionary<string, string> p = new Dictionary<string, string>();
private static Boolean islogin = false;
//private static string passkey = null;
//回傳的網頁內容
private StringBuilder buff;
private bool success = false;
private string msg = "";
// private bool needlogin = false;
//記錄Session及Cookie,如果登入時Session將一直存在
private static CookieContainer cookie = new CookieContainer();
public WebModule()
{
buff = new StringBuilder();
}
public bool isSuccess()
{
return success;
}
public string replyMsg()
{
return msg;
}
public static bool isLogin()
{
return islogin;
}
//取得網頁內容
public string getContent()
{
return buff.ToString();
}
// private static readonly object _locker = new object();
//做post事件
public bool doPost(string sUrl, string data, string referer)
{
bool dosuccess = false;
// lock (_locker)
{
HttpWebRequest URLConn = null;
try
{
// sUrl = sUrl.IndexOf(Prop.getProp()["server"]) > -1 ? sUrl : (Prop.getProp()["server"] + sUrl);
Console.WriteLine(sUrl);
//URL連線
URLConn = (HttpWebRequest)WebRequest.Create(sUrl);
//連線最大等待時間
URLConn.Timeout = 60000;
URLConn.Method = "POST";
//設定Header模擬瀏覽器
URLConn.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-TW; rv:1.9.1.2) "
+ "Gecko/20090729 Firefox/3.5.2 GTB5 (.NET CLR 3.5.30729)";
URLConn.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
URLConn.Headers.Set("Accept-Language",
"zh-tw,en-us;q=0.7,en;q=0.3");
URLConn.Headers.Set("Accept-Charse",
"Big5,utf-8;q=0.7,*;q=0.7");
URLConn.Headers["Accept-Encoding"] = "gzip, deflate";
//設定referer
if (referer != null)
{
URLConn.Referer = referer;
}
//純文字傳送,使用application/x-www-form-urlencoded
//如需傳送檔案,則需用multipart/form-data
URLConn.ContentType = "application/x-www-form-urlencoded";
//自動從導
URLConn.AllowAutoRedirect = true;
if (data == null)
data = "";
Console.WriteLine(data);
//把要傳送的資料變成binary
byte[] bytes = Encoding.UTF8.GetBytes(data);
URLConn.ContentLength = bytes.Length;
//設定Cookie,Session
URLConn.CookieContainer = cookie;
//送出post資料
if (data.Length > 0)
{
Stream oStreamOut = URLConn.GetRequestStream();
oStreamOut.Write(bytes, 0, bytes.Length);
//oStreamOut.Close();
}
string sResponseHeader = URLConn.GetResponse().Headers["Content-Encoding"];
string html = "";
System.IO.StreamReader webReader;
webReader = (new StreamReader(URLConn.GetResponse().GetResponseStream()));
//取回回傳內容
if (!string.IsNullOrEmpty(sResponseHeader))
{
if (sResponseHeader.ToLower().Contains("gzip"))
{
byte[] b = DecompressGzip(webReader.BaseStream);
data = System.Text.Encoding.GetEncoding("UTF-8").GetString(b);
}
else if (sResponseHeader.ToLower().Contains("deflate"))
{
byte[] b = DecompressDeflate(webReader.BaseStream);
data = System.Text.Encoding.GetEncoding("UTF-8").GetString(b);
}
}
// uncompressed, standard response
else
{
html = webReader.ReadToEnd();
}
buff.Clear();
buff.Append(html);
dosuccess = true;
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
finally
{
try
{
if (URLConn != null)
{
URLConn.GetResponse().Close();
//URLConn.GetRequestStream().Close();
}
}
catch (Exception exx)
{
Console.WriteLine(exx);
}
}
}
Console.WriteLine(buff);
return dosuccess;
}
private static byte[] DecompressGzip(Stream streamInput)
{
Stream streamOutput = new MemoryStream();
int iOutputLength = 0;
try
{
byte[] readBuffer = new byte[4096];
/// read from input stream and write to gzip stream
using (GZipStream streamGZip = new GZipStream(streamInput, CompressionMode.Decompress))
{
int i;
while ((i = streamGZip.Read(readBuffer, 0, readBuffer.Length)) != 0)
{
streamOutput.Write(readBuffer, 0, i);
iOutputLength = iOutputLength + i;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
/// read uncompressed data from output stream into a byte array
byte[] buffer = new byte[iOutputLength];
streamOutput.Position = 0;
streamOutput.Read(buffer, 0, buffer.Length);
return buffer;
}
private static byte[] DecompressDeflate(Stream streamInput)
{
Stream streamOutput = new MemoryStream();
int iOutputLength = 0;
try
{
byte[] readBuffer = new byte[4096];
/// read from input stream and write to gzip stream
using (DeflateStream streamGZip = new DeflateStream(streamInput, CompressionMode.Decompress))
{
int i;
while ((i = streamGZip.Read(readBuffer, 0, readBuffer.Length)) != 0)
{
streamOutput.Write(readBuffer, 0, i);
iOutputLength = iOutputLength + i;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
// todo: handle exception
}
/// read uncompressed data from output stream into a byte array
byte[] buffer = new byte[iOutputLength];
streamOutput.Position = 0;
streamOutput.Read(buffer, 0, buffer.Length);
return buffer;
}
private decimal bytesProcessed = 0;
private decimal bytesTotal = 0;
public decimal getByteTotal()
{
return bytesTotal;
}
public decimal ByteProcess()
{
return bytesProcessed;
}
public decimal DownloadFile(string sUrl, string data, string referer, String localFilename)
{
// Function will return the number of bytes processed
// to the caller. Initialize to 0 here.
bytesProcessed = 0;
bytesTotal = 0;
// Assign values to these objects here so that they can
// be referenced in the finally block
Stream remoteStream = null;
Stream localStream = null;
WebResponse response = null;
// Use a try/catch/finally block as both the WebRequest and Stream
HttpWebRequest URLConn = null;
try
{
// sUrl = sUrl.IndexOf(Prop.getProp()["server"]) > -1 ? sUrl : (Prop.getProp()["server"] + sUrl);
Console.WriteLine(sUrl);
//URL連線
URLConn = (HttpWebRequest)WebRequest.Create(sUrl);
//連線最大等待時間
URLConn.Timeout = 60000;
URLConn.Method = "POST";
//設定Header模擬瀏覽器
URLConn.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-TW; rv:1.9.1.2) "
+ "Gecko/20090729 Firefox/3.5.2 GTB5 (.NET CLR 3.5.30729)";
URLConn.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
URLConn.Headers.Set("Accept-Language",
"zh-tw,en-us;q=0.7,en;q=0.3");
URLConn.Headers.Set("Accept-Charse",
"Big5,utf-8;q=0.7,*;q=0.7");
URLConn.Headers["Accept-Encoding"] = "gzip, deflate";
//設定referer
if (referer != null)
{
URLConn.Referer = referer;
}
//純文字傳送,使用application/x-www-form-urlencoded
//如需傳送檔案,則需用multipart/form-data
URLConn.ContentType = "application/x-www-form-urlencoded";
//自動從導
URLConn.AllowAutoRedirect = true;
if (data == null)
data = "";
Console.WriteLine(data);
//把要傳送的資料變成binary
byte[] bytes = Encoding.UTF8.GetBytes(data);
URLConn.ContentLength = bytes.Length;
//設定Cookie,Session
URLConn.CookieContainer = cookie;
//送出post資料
if (data.Length > 0)
{
Stream oStreamOut = URLConn.GetRequestStream();
oStreamOut.Write(bytes, 0, bytes.Length);
//oStreamOut.Close();
}
string sResponseHeader = URLConn.GetResponse().Headers["Content-Length"];
bytesTotal = Convert.ToInt32(sResponseHeader);
response = URLConn.GetResponse();
if (response != null)
{
// Once the WebResponse object has been retrieved,
// get the stream object associated with the response's data
remoteStream = response.GetResponseStream();
// Create the local file
localStream = File.Create(localFilename);
// Allocate a 1k buffer
byte[] buffer = new byte[1024];
int bytesRead;
// Simple do/while loop to read from stream until
// no bytes are returned
Object o = new Object();
do
{
// Read data (up to 1k) from the stream
bytesRead = remoteStream.Read(buffer, 0, buffer.Length);
// Write the data to the local file
localStream.Write(buffer, 0, bytesRead);
//Console.WriteLine(bytesProcessed + "\t" + bytesTotal);
// Increment total bytes processed
//Console.WriteLine(bytesProcessed);
bytesProcessed += bytesRead;
} while (bytesRead > 0);
//bytesProcessed = bytesTotal;
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
finally
{
// Close the response and streams objects here 
// to make sure they're closed even if an exception
// is thrown at some point
if (response != null) response.Close();
if (remoteStream != null) remoteStream.Close();
if (localStream != null) localStream.Close();
}
// Return total bytes processed to caller.
return bytesProcessed;
}
}
}

2 thoughts to “Csharp WebRequest實作POST來取得網頁內容(及檔案下載)”

發表迴響