Working with HTTP_Request

I needed to make a bit of sample code for someone the other day, demonstrating how to log into a website and collect data programatically from PHP. The HTTP_Request PEAR class works fairly well for this; HTTP_Request2 should be a big improvement when it’s finished, as the original is geared toward PHP 4’s abysmal object model.

#! /opt/local/bin/php
< ?php
/*
Sample code for HTTP_Request
Logs into Facebook and grabs the first story on your home page
This code fetches 3 pages, all with little quirks so is a good example of usage
First page is the Facebook home page, just a normal GET request, with one cookie set
Second page is the login form, this is a POST and we also add on cookies that we got in the first request
We check the response headers to make sure that things were successful
Third page is the home page, which is another plain old GET request with new cookies added on, and the POST data removed
The response body is saved from the third page, and some stuff is parsed out of it; code like that relies on a page not changing its structure too often, so is kind of a hack
*/
 
//HTTP_Request raises strict errors cause it's old code written for PHP 4
$err = error_reporting(0);
 
//setup
$username = 'facebook@example.com';  //facebook email address
$password = 'password';  //facebook password
require_once "HTTP/Request.php";
 
//set up the new instance
$req = new HTTP_Request();
 
//where are we going?
$req->setURL('http://www.facebook.com/');
 
//and how are we getting there? (POST or GET)
$req->setMethod(HTTP_REQUEST_METHOD_GET);
 
//pretend to be a web browser
$req->addHeader('User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.4) Gecko/2008102920 Firefox/3.0.4');
 
//special FB cookie
$req->addCookie('cavalry_transit_start_time', time() . '000');
 
//send the request and check for errors at the same time; the parameter says not to save the response body, which we don't need this time
if (PEAR::isError($req->sendRequest(false))) {
	die("Error connecting to Facebook\n\n");
}
else {
	if ($req->getResponseCode() == '200') echo "\n\nGot Facebook index page\n\n";
	else {
		echo $req->_buildRequest();
		echo "\n\n";
		echo $req->getResponseCode();
		echo "\n\n";
		print_r($req->getResponseHeader());
		die;
	}
}
 
//grab those cookies and stick them back on
$cookies = $req->getResponseCookies();
foreach($cookies as $cookie) {
	$req->addCookie($cookie['name'], $cookie['value']);
}
 
//special FB cookie
$req->addCookie('cavalry_transit_start_time', time() . '000');
 
//now we're going to post the login form; reuse the same object
$req->setURL('https://login.facebook.com/login.php?login_attempt=1');
$req->setMethod(HTTP_REQUEST_METHOD_POST);
 
//pretend we're coming from the previous page
$req->addHeader('Referer', 'http://www.facebook.com/');
 
//these are the form fields that are passed, some are hidden so we need to check the HTML carefully
//this one's already URL encoded, so set the third parameter to true
$req->addPostData('charset_test', '%E2%82%AC%2C%C2%B4%2C%E2%82%AC%2C%C2%B4%2C%E6%B0%B4%2C%D0%94%2C%D0%84', true);
$req->addPostData('locale', 'en_US');
$req->addPostData('email', $username);
$req->addPostData('pass', $password);
$req->addPostData('pass_placeholder', 'Password');
 
//send it and check for errors at the same time; the parameter says not to save the response body, which we don't need this time
if (PEAR::isError($req->sendRequest())) {
	die("\n\nError logging in to Facebook\n\n");
}
else {
//if the login works, FB sends a 302 redirect to the home page; if not, it sends a redirect back to the login page
	if ($req->getResponseCode() == '302' && $req->getResponseHeader('Location') == 'http://www.facebook.com/home.php?') echo "\n\nLogged in to Facebook\n\n";
	else {
		echo $req->_buildRequest();
		echo "\n\n";
		echo $req->getResponseCode();
		echo "\n\n";
		print_r($req->getResponseHeader());
		die;
	}
}
 
//grab any new cookies and stick them back on
$cookies = $req->getResponseCookies();
foreach($cookies as $cookie) {
	$req->addCookie($cookie['name'], $cookie['value']);
}
 
//special FB cookie
$req->addCookie('cavalry_transit_start_time', time() . '000');
 
//reuse the object again to get the home page
$req->setURL('http://www.facebook.com/home.php?');
$req->setMethod(HTTP_REQUEST_METHOD_GET);
$req->clearPostData();
 
//send it, but keep the body this time
if (PEAR::isError($req->sendRequest())) {
	die('Error getting Facebook home');
}
else {
	if ($req->getResponseCode() == '200') echo "\n\nGot Facebook home\n\n";
	else {
		echo $req->_buildRequest();
		echo "\n\n";
		echo $req->getResponseCode();
		echo "\n\n";
		print_r($req->getResponseHeader());
		die;
	}
}
 
//get the HTML that came back
$html = $req->getResponseBody();
 
//use some search magic to pull out something
preg_match('/span id="presence_notifications_count">.strong>(\d+)/i', $html, $matches);
$count = empty($matches[1]) ? '0' : $matches[1];
preg_match('/h3 class=.?"UIIntentionalStory_Message.?".*?>(.*?)< .?\/h3>/i', $html, $matches);
$story = preg_replace('/(< .*?>)|\\\n|\s{2,}/', ' ', $matches[1]);
echo "\n\nYou have $count notifications. First story on your home page: $story\n\n";
 
//one last request, let's log out
if (preg_match('/a href="(.*?)">Logout< \/a>/i', $html, $matches)) {
	$req->setURL($matches[1]);
	$req->sendRequest();
	if (PEAR::isError($req->sendRequest())) {
		die('Error logging out');
	}
	else {
		echo "\n\nLogged out\n\n";
	}
}
?>

#! /opt/local/bin/php < ?php /* Sample code for HTTP_Request Logs into Facebook and grabs the first story on your home page This code fetches 3 pages, all with little quirks so is a good example of usage First page is the Facebook home page, just a normal GET request, with one cookie set Second page is the login form, this is a POST and we also add on cookies that we got in the first request We check the response headers to make sure that things were successful Third page is the home page, which is another plain old GET request with new cookies added on, and the POST data removed The response body is saved from the third page, and some stuff is parsed out of it; code like that relies on a page not changing its structure too often, so is kind of a hack */ //HTTP_Request raises strict errors cause it's old code written for PHP 4 $err = error_reporting(0); //setup $username = 'facebook@example.com'; //facebook email address $password = 'password'; //facebook password require_once "HTTP/Request.php"; //set up the new instance $req = new HTTP_Request(); //where are we going? $req->setURL('http://www.facebook.com/'); //and how are we getting there? (POST or GET) $req->setMethod(HTTP_REQUEST_METHOD_GET); //pretend to be a web browser $req->addHeader('User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.4) Gecko/2008102920 Firefox/3.0.4'); //special FB cookie $req->addCookie('cavalry_transit_start_time', time() . '000'); //send the request and check for errors at the same time; the parameter says not to save the response body, which we don't need this time if (PEAR::isError($req->sendRequest(false))) { die("Error connecting to Facebook\n\n"); } else { if ($req->getResponseCode() == '200') echo "\n\nGot Facebook index page\n\n"; else { echo $req->_buildRequest(); echo "\n\n"; echo $req->getResponseCode(); echo "\n\n"; print_r($req->getResponseHeader()); die; } } //grab those cookies and stick them back on $cookies = $req->getResponseCookies(); foreach($cookies as $cookie) { $req->addCookie($cookie['name'], $cookie['value']); } //special FB cookie $req->addCookie('cavalry_transit_start_time', time() . '000'); //now we're going to post the login form; reuse the same object $req->setURL('https://login.facebook.com/login.php?login_attempt=1'); $req->setMethod(HTTP_REQUEST_METHOD_POST); //pretend we're coming from the previous page $req->addHeader('Referer', 'http://www.facebook.com/'); //these are the form fields that are passed, some are hidden so we need to check the HTML carefully //this one's already URL encoded, so set the third parameter to true $req->addPostData('charset_test', '%E2%82%AC%2C%C2%B4%2C%E2%82%AC%2C%C2%B4%2C%E6%B0%B4%2C%D0%94%2C%D0%84', true); $req->addPostData('locale', 'en_US'); $req->addPostData('email', $username); $req->addPostData('pass', $password); $req->addPostData('pass_placeholder', 'Password'); //send it and check for errors at the same time; the parameter says not to save the response body, which we don't need this time if (PEAR::isError($req->sendRequest())) { die("\n\nError logging in to Facebook\n\n"); } else { //if the login works, FB sends a 302 redirect to the home page; if not, it sends a redirect back to the login page if ($req->getResponseCode() == '302' && $req->getResponseHeader('Location') == 'http://www.facebook.com/home.php?') echo "\n\nLogged in to Facebook\n\n"; else { echo $req->_buildRequest(); echo "\n\n"; echo $req->getResponseCode(); echo "\n\n"; print_r($req->getResponseHeader()); die; } } //grab any new cookies and stick them back on $cookies = $req->getResponseCookies(); foreach($cookies as $cookie) { $req->addCookie($cookie['name'], $cookie['value']); } //special FB cookie $req->addCookie('cavalry_transit_start_time', time() . '000'); //reuse the object again to get the home page $req->setURL('http://www.facebook.com/home.php?'); $req->setMethod(HTTP_REQUEST_METHOD_GET); $req->clearPostData(); //send it, but keep the body this time if (PEAR::isError($req->sendRequest())) { die('Error getting Facebook home'); } else { if ($req->getResponseCode() == '200') echo "\n\nGot Facebook home\n\n"; else { echo $req->_buildRequest(); echo "\n\n"; echo $req->getResponseCode(); echo "\n\n"; print_r($req->getResponseHeader()); die; } } //get the HTML that came back $html = $req->getResponseBody(); //use some search magic to pull out something preg_match('/span id="presence_notifications_count">.strong>(\d+)/i', $html, $matches); $count = empty($matches[1]) ? '0' : $matches[1]; preg_match('/h3 class=.?"UIIntentionalStory_Message.?".*?>(.*?)< .?\/h3>/i', $html, $matches); $story = preg_replace('/(< .*?>)|\\\n|\s{2,}/', ' ', $matches[1]); echo "\n\nYou have $count notifications. First story on your home page: $story\n\n"; //one last request, let's log out if (preg_match('/a href="(.*?)">Logout< \/a>/i', $html, $matches)) { $req->setURL($matches[1]); $req->sendRequest(); if (PEAR::isError($req->sendRequest())) { die('Error logging out'); } else { echo "\n\nLogged out\n\n"; } } ?>

5 Replies to “Working with HTTP_Request”

Comments are closed.