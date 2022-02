browser

[Node.js] browsing urls with cookies, that is, we can scrape with authenticated pages!

Installation

git clone git://github.com/shinout/browser.git OR npm install browser

Features

automatic cookie management

easy asynchronous handling with Junjo.js

Usage

helloworld (onetime access)

var browser = require ( "browser" ); browser.browse( "shinout.net" , function ( err, out ) { console .log(out.result); });

helloworld2 (using object)

var browser = require ( "browser" ); var $b = new browser(); $b.browse( 'https://accounts.google.com/Login' ); $b.on( "end" , function (err, out) { console.log(out.url, out.result, out.responseHeaders); }); $b.run();

login sample (requires jquery)

var userdata = { email: "XXXXXX@gmail.com" , pass : "XXXXXXXX" }; var $b = new browser(); $b.submit({ from : 'https://accounts.google.com/Login' , selector: "#gaia_loginform" , data : { Email : userdata.email, Passwd : userdata.pass } }); $b.browse( 'https://mail.google.com/mail/u/0/?ui=html&zy=d' ) .after(); $b.on( "end" , function (err, out) { console.log(out.url, out.result, out.responseHeaders); });

login sample2 (do what $b.submit() is doing manually)

var userdata = { email: "XXXXXX@gmail.com" , pass : "XXXXXXXX" }; var browser = require ( "browser" ); var $b = new browser(); $b.browse( 'login' , 'https://accounts.google.com/Login' , {debug: true }); $b.browse( function (err, out) { var jsdom = require ( "jsdom" ).jsdom; var jquery = require ( "jquery" ); var window = jsdom(out.result).createWindow(); var $ = jquery.create(window); var postdata = { Email : userdata.email, Passwd : userdata.pass }; var url = $( "#gaia_loginform" ).attr( "action" ); $( "input" ).each( function (k, el) { var $el = $(el); var name = $el.attr( "name" ), type = $el.attr( "type" ), val = $el.val(); if (type == "hidden" || type == "submit" ) postdata[name] = val; }); return [url, { data : postdata, method: "POST" }]; }) .after( "login" ); $b.browse( 'https://mail.google.com/mail/u/0/?ui=html&zy=d' ) .after(); $b.on( "end" , function (err, out) { console.log(out.result); }); $b.run();

options object

option object to pass to $b.browse() is the same format as u2r options. See u2r in detail. The following are common options.

data : (object) key-value pairs to pass to server

method : HTTP method (GET|POST|PUT|DELETE|HEAD). default: GET

all other values below are automatically generated from URL

host

protocol

path

port

body : querystring format of options.data

keys of out object

result : response data(Buffer or String)

statusCode

location

responseHeaders

cookies : set-cookie headers

url : browsed url

Feel free to contact @shinout!