Skip to content Skip to sidebar Skip to footer

How To Return Html Of A Page Using Robobrowser

I'm experimenting with http://robobrowser.readthedocs.org/en/latest/readme.html, a new python library based on the beautiful soup library. I'm trying to test it out by opening an h

Solution 1:

You can try using the parsed property.

Code:

from robobrowser import RoboBrowser

url = "http://www.google.com"
br = RoboBrowser(history=True)
br.open(url)
print br.parsed

Result:

<!DOCTYPE html><htmlitemscope=""itemtype="http://schema.org/WebPage"lang="en-PH"><head><metacontent="/images/google_favicon_128.png"itemprop="image"/><title>Google</title><script>(function(){
window.google={kEI:"-RFgU9LgJsq6uATKqYGoDg",getEI:function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI},https:function(){return"https:"==window.location.protocol},kEXPI:"17259,4000116,4007661,4007830,4008142,4009033,4009641,4010806,4010858,4010899,4011228,4011258,4011679,4012373,4012504,4012860,4013374,4013414,4013591,4013723,4013758,4013787,4013823,4013941,4013967,4013979,4014016,4014033,4014093,4014431,4014515,4014636,4014671,4014810,4014813,4014828,4014991,4015119,4015155,4015234,4015260,4015519,4015550,4015587,4015635,4015638,4015639,4015772,4015901,4016005,4016042,4016127,4016309,4016363,4016367,4016371,4016391,4016425,4016452,4016466,4016479,4016487,4016638,4016703,4016730,4016786,4016800,4016824,4016855,4016933,4016969,4016978,4017042,4017079,4017177,4017202,4017206,4017280,4017285,4017294,4017301,4017336,4017556,8300015,8300017,8500165,8500223,8500240,8500252,8500255,8500283,8500306,8500313,8500325,8500332,8500349,10200002,10200012,10200029,10200038,10200040,10200048,10200053,10200055,10200066,10200083,10200120,10200134,10200136,10200155,10200157,10200159,10200164,10200195,10200211,10200215,10200221,10200224,10200231,10200236,10200242,10200246,10200252",kCSI:{e:"17259,4000116,4007661,4007830,4008142,4009033,4009641,4010806,4010858,4010899,4011228,4011258,4011679,4012373,4012504,4012860,4013374,4013414,4013591,4013723,4013758,4013787,4013823,4013941,4013967,4013979,4014016,4014033,4014093,4014431,4014515,4014636,4014671,4014810,4014813,4014828,4014991,4015119,4015155,4015234,4015260,4015519,4015550,4015587,4015635,4015638,4015639,4015772,4015901,4016005,4016042,4016127,4016309,4016363,4016367,4016371,4016391,4016425,4016452,4016466,4016479,4016487,4016638,4016703,4016730,4016786,4016800,4016824,4016855,4016933,4016969,4016978,4017042,4017079,4017177,4017202,4017206,4017280,4017285,4017294,4017301,4017336,4017556,8300015,8300017,8500165,8500223,8500240,8500252,8500255,8500283,8500306,8500313,8500325,8500332,8500349,10200002,10200012,10200029,10200038,10200040,10200048,10200053,10200055,10200066,10200083,10200120,10200134,10200136,10200155,10200157,10200159,10200164,10200195,10200211,10200215,10200221,10200224,10200231,10200236,10200242,10200246,10200252",ei:"-RFgU9LgJsq6uATKqYGoDg"},authuser:0,ml:function(){},kHL:"en",time:function(){return(newDate).getTime()},log:function(a,b,c,h,k){var d=
newImage,f=google.lc,e=google.li,g="";d.onerror=d.onload=d.onabort=function(){delete f[e]};f[e]=d;c||-1!=b.search("&ei=")||(g="&ei="+google.getEI(h));c=c||"/"+(k||"gen_204")+"?atyp=i&ct="+a+"&cad="+b+g+"&zx="+google.time();a=/^http:/i;a.test(c)&&google.https()?(google.ml(Error("GLMM"),!1,{src:c}),delete f[e]):(d.src=c,google.li=e+1)},lc:[],li:0,y:{},x:function(a,b){google.y[a.id]=[a,b];return!1},load:function(a,b,c){google.x({id:a+l++},function(){google.load(a,b,c)})}};var l=0;})();
(function(){google.sn="webhp";google.timers={};google.startTick=function(a,b){var f=google.time();google.timers[a]={t:{start:f},bfr:!!b};};google.tick=function(a,b,f){google.timers[a]||google.startTick(a);google.timers[a].t[b]=f||google.time()};google.startTick("load",!0);
try{}catch(d){}})();
var _gjwl=location;function_gjuc(){var a=_gjwl.href.indexOf("#");if(0<=a&&(a=_gjwl.href.substring(a),0<a.indexOf("&q=")||0<=a.indexOf("#q="))&&(a=a.substring(1),-1==a.indexOf("#"))){for(var d=0;d<a.length;){var b=d;"&"==a.charAt(b)&&++b;var c=a.indexOf("&",b);-1==c&&(c=a.length);b=a.substring(b,c);if(0==b.indexOf("fp="))a=a.substring(0,d)+a.substring(c,a.length),c=d;elseif("cad=h"==b)return0;d=c}_gjwl.href="/search?"+a+"&cad=h";return1}return0}
function_gjh(){!_gjuc()&&window.google&&google.x&&google.x({id:"GJH"},function(){google.nav&&google.nav.gjh&&google.nav.gjh()})};
window._gjh&&_gjh();</script><style>#gbar,#guser{font-size:13px;padding-top:1px!important;}#gbar{height:22px}#guser{padding-bottom:7px!important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c!important}.gbi.gb4{color:#dd8e27!important}.gbf.gb4{color:#900!important}</style><style>body,td,a,p,.h{font-family:arial,sans-serif}body{margin:0;overflow-y:scroll}#gog{padding:3px8px0}td{line-height:.8em}.gac_mtd{line-height:17px}form{margin-bottom:20px}.h{color:#36c}.q{color:#00c}.tstd{padding:0}.ts{border-collapse:collapse}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px arial,sans-serif}.gsfs{font:17px arial,sans-serif}.ds{display:inline-box;display:inline-block;margin:3px04px;margin-left:4px}input{font-family:inherit}a.gb1,a.gb2,a.gb3,a.gb4{color:#11c!important}body{background:#fff;color:black}a{color:#11c;text-decoration:none}a:hover,a:active{text-decoration:underline}.fla{color:#36c}a:visited{color:#551a8b}a.gb1,a.gb4{text-decoration:underline}a.gb3:hover{text-decoration:none}#gheada.gb2:hover{color:#fff!important}.sblc{padding-top:5px}.sblca{display:block;margin:2px0;margin-left:13px;font-size:11px}.lsbb{background:#eee;border:solid 1px;border-color:#ccc#999#999#ccc;height:30px}.lsbb{display:block}.ftl,#flla{display:inline-block;margin:012px}.lsb{background:url(/images/srpr/nav_logo80.png) 0 -258px repeat-x;border:none;color:#000;cursor:pointer;height:30px;margin:0;outline:0;font:15px arial,sans-serif;vertical-align:top}.lsb:active{background:#ccc}.lst:focus{outline:none}#addlanga{padding:03px}</style><script></script></head><bodybgcolor="#fff"><script>(function(){var src='/images/nav_logo176.png';var iesg=false;document.body.onload = function(){window.n && window.n();if (document.images){newImage().src=src;}
if (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}
}
})();</script><textareaid="csi"style="display:none"></textarea><divid="mngb"><divid="gbar"><nobr><bclass="gb1">Search</b><aclass="gb1"href="http://www.google.com.ph/imghp?hl=en&amp;tab=wi">Images</a><aclass="gb1"href="http://maps.google.com.ph/maps?hl=en&amp;tab=wl">Maps</a><aclass="gb1"href="https://play.google.com/?hl=en&amp;tab=w8">Play</a><aclass="gb1"href="http://www.youtube.com/?gl=PH&amp;tab=w1">YouTube</a><aclass="gb1"href="http://news.google.com.ph/nwshp?hl=en&amp;tab=wn">News</a><aclass="gb1"href="https://mail.google.com/mail/?tab=wm">Gmail</a><aclass="gb1"href="https://drive.google.com/?tab=wo">Drive</a><aclass="gb1"href="http://www.google.com.ph/intl/en/options/"style="text-decoration:none"><u>More</u> »</a></nobr></div><divid="guser"width="100%"><nobr><spanclass="gbi"id="gbn"></span><spanclass="gbf"id="gbf"></span><spanid="gbe"></span><aclass="gb4"href="http://www.google.com.ph/history/optout?hl=en">Web History</a> | <aclass="gb4"href="/preferences?hl=en">Settings</a> | <aclass="gb4"href="https://accounts.google.com/ServiceLogin?hl=en&amp;continue=http://www.google.com.ph/%3Fgfe_rd%3Dcr%26ei%3D-BFgU62INOmNiAeYnYDoAg"id="gb_70"target="_top">Sign in</a></nobr></div><divclass="gbh"style="left:0"></div><divclass="gbh"style="right:0"></div></div><center><brclear="all"id="lgpd"/><divid="lga"><divstyle="padding:28px 0 3px"><divalign="left"id="hplogo"onload="window.lol&amp;&amp;lol()"style="height:110px;width:276px;background:url(/images/srpr/logo9w.png) no-repeat"title="Google"><divnowrap=""style="color:#777;font-size:16px;font-weight:bold;position:relative;top:70px;left:218px">Philippines</div></div></div><br/></div><formaction="/search"name="f"><tablecellpadding="0"cellspacing="0"><trvalign="top"><tdwidth="25%"> </td><tdalign="center"nowrap=""><inputname="ie"type="hidden"value="ISO-8859-1"/><inputname="hl"type="hidden"value="en-PH"/><inputname="source"type="hidden"value="hp"/><divclass="ds"style="height:32px;margin:4px 0"><inputautocomplete="off"class="lst"maxlength="2048"name="q"size="57"style="color:#000;margin:0;padding:5px 8px 0 6px;vertical-align:top"title="Google Search"value=""/></div><brstyle="line-height:0"/><spanclass="ds"><spanclass="lsbb"><inputclass="lsb"name="btnG"type="submit"value="Google Search"/></span></span><spanclass="ds"><spanclass="lsbb"><inputclass="lsb"name="btnI"onclick="if(this.form.q.value)this.checked=1; else top.location='/doodles/'"type="submit"value="I'm Feeling Lucky"/></span></span></td><tdalign="left"class="fl sblc"nowrap=""width="25%"><ahref="/advanced_search?hl=en-PH&amp;authuser=0">Advanced search</a><ahref="/language_tools?hl=en-PH&amp;authuser=0">Language tools</a></td></tr></table><inputid="gbv"name="gbv"type="hidden"value="1"/></form><divid="gac_scont"></div><divstyle="font-size:83%;min-height:3.5em"><br/><divid="als"><fontid="addlang"size="-1">Google.com.ph offered in: <ahref="http://www.google.com.ph/setprefs?sig=0_SbhOaVIheKTw2jFHRcEg8o-Evng%3D&amp;hl=tl&amp;source=homepage">Filipino</a><ahref="http://www.google.com.ph/setprefs?sig=0_SbhOaVIheKTw2jFHRcEg8o-Evng%3D&amp;hl=ceb&amp;source=homepage">Cebuano</a></font><br/><br/></div></div><spanid="footer"><divstyle="font-size:10pt"><divid="fll"style="margin:19px auto;text-align:center"><ahref="/intl/en/ads/">Advertising Programs</a><ahref="http://www.google.com.ph/intl/en/services/">Business Solutions</a><ahref="/intl/en/about.html">About Google</a><ahref="http://www.google.com.ph/setprefdomain?prefdom=US&amp;sig=0_dQ2pwXFotFQfDlj9qmDCkzdxCdA%3D"id="fehl">Google.com</a></div></div><pstyle="color:#767676;font-size:8pt">© 2013 - <ahref="/intl/en/policies/">Privacy &amp; Terms</a></p></span></center><divid="xjsd"></div><divdata-jiis="bp"id="xjsi"><script>if(google.y)google.y.first=[];(function(){functionb(a){window.setTimeout(function(){var c=document.createElement("script");c.src=a;document.getElementById("xjsd").appendChild(c)},0)}google.dljp=function(a){google.xjsu=a;b(a)};google.dlj=b;})();
if(!google.xjs){window._=window._||{};window._._DumpException=function(e){throw e};if(google.timers&&google.timers.load.t){google.timers.load.t.xjsls=newDate().getTime();}google.dljp('/xjs/_/js/k\x3dxjs.hp.en_US.RLLpSOAzMFM.O/m\x3dsb_he,pcc/rt\x3dj/d\x3d1/sv\x3d1/rs\x3dAItRSTOBXfxSyWrXjOGBi9e9cIs5cEBO6A');google.xjs=1;}google.pmc={"sb_he":{"agen":true,"cgen":true,"client":"heirloom-hp","dh":true,"ds":"","eqch":true,"fl":true,"host":"google.com.ph","jam":0,"jsonp":true,"msgs":{"cibl":"Clear Search","dym":"Did you mean:","lcky":"I\u0026#39;m Feeling Lucky","lml":"Learn more","oskt":"Input tools","psrc":"This search was removed from your \u003Ca href=\"/history\"\u003EWeb History\u003C/a\u003E","psrl":"Remove","sbit":"Search by image","srch":"Google Search"},"ovr":{},"pq":"","qcpw":false,"scd":10,"sce":5,"stok":"QGTPqfOgiEZ_AI3e5vphR6-NOmw"},"pcc":{}};google.y.first.push(function(){if(google.med){google.med('init');google.initHistory();google.med('history');}});if(google.j&&google.j.en&&google.j.xi){window.setTimeout(google.j.xi,0);}</script></div><script>(function(){if(google.timers&&google.timers.load.t){var b,c,d,e,g=function(a,f){a.removeEventListener?(a.removeEventListener("load",f,!1),a.removeEventListener("error",f,!1)):(a.detachEvent("onload",f),a.detachEvent("onerror",f))},h=function(a){e=(newDate).getTime();++c;a=a||window.event;a=a.target||a.srcElement;g(a,h)},k=document.getElementsByTagName("img");b=k.length;for(var l=c=0,m;l<b;++l)m=k[l],m.complete||"string"!=typeof m.src||!m.src?++c:m.addEventListener?(m.addEventListener("load",h,!1),m.addEventListener("error",
h,!1)):(m.attachEvent("onload",h),m.attachEvent("onerror",h));d=b-c;var n=function(){if(google.timers.load.t){google.timers.load.t.ol=(newDate).getTime();google.timers.load.t.iml=e;google.kCSI.imc=c;google.kCSI.imn=b;google.kCSI.imp=d;void0!==google.stt&&(google.kCSI.stt=google.stt);google.csiReport&&google.csiReport()}};window.addEventListener?window.addEventListener("load",n,!1):window.attachEvent&&
window.attachEvent("onload",n);google.timers.load.t.prt=e=(newDate).getTime()};})();
</script></body></html>
[Finished in 4.3s]

How it scrapes is up to you, though. In any case, if you're having difficulties getting to know new libraries such as this, always exhaust the documentation.

From the site itself:

parsed
    Lazily parse response content, using HTML parser specified by the browser.

Source

Or you can use dir on br and get the following:

['__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_build_url', '_cursor', '_maxlen', '_states', '_traverse', '_update_state', 'back', 'find', 'find_all', 'follow_link', 'forward', 'get_form', 'get_forms', 'get_link', 'get_links', 'history', 'open', 'parsed', 'parser', 'response', 'select', 'session', 'state', 'submit_form', 'timeout', 'url'][Finished in 5.3s]

As you can see, towards the end, parsed is shown.

Hope this helps.

Post a Comment for "How To Return Html Of A Page Using Robobrowser"