Wednesday, July 2, 2014

Python script to grab the title of a page

#!/usr/bin/env python
import urllib2
import sys
from BeautifulSoup import BeautifulSoup
#expects "http://example.com" as argument

try:
    urllib2.urlopen(sys.argv[1])
except urllib2.HTTPError, e:
    print sys.argv[1], '--- HTTPERROR'
    quit()


soup = BeautifulSoup(urllib2.urlopen(sys.argv[1]))

if soup.title:
  print sys.argv[1], "--- ", soup.title.string
else:
  print sys.argv[1], '--- NULL'

No comments:

Post a Comment