Home

Script: pagetitle.py

Adds HTML page titles to http:// urls in your message. (for WeeChat ≥ 0.3.0)
Author: xororand, version 0.5, Public_domain — added: 2008-06-17, updated: 2009-05-02
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
# pagetitle plugin for weechat-0.3.0
#
#  /pt http://tech.slashdot.org/tech/08/11/12/199215.shtml
#  <user> http://tech.slashdot.org/tech/08/11/12/199215.shtml
#		 ('Slashdot | Microsoft's "Dead Cow" Patch Was 7 Years In the Making')
#
# xororand @ irc://irc.freenode.net/#weechat
#
# 2009-05-02, FlashCode <flashcode@flashtux.org>:
#     version 0.5: sync with last API changes

import htmllib
import re
import socket
import sys
import urllib2

limit_title_length = 100
debug = True

# user agent
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (weechat/pagetitle)')]
urllib2._urlopener = opener

# set a short timeout to avoid freezing weechat [seconds]
socket.setdefaulttimeout(5)

regex_url = re.compile("""https?://[^ ]+""")

def unescape(s): #{{{
	"""Unescape HTML entities"""
	p = htmllib.HTMLParser(None)
	p.save_bgn()
	p.feed(s)
	return p.save_end() #}}}

def getPageTitle(url):
	"""Retrieve the HTML <title> from a webpage"""

	try:
		u = urllib2.urlopen(url)
	except urllib2.HTTPError, e:
		raise NameError(str(e))
	except urllib2.URLError, e:
		raise NameError(str(e))

	info = u.info()
	try:
		content_type = info['Content-Type']
		if not re.match(".*/html.*",content_type):
			return ""
	except:
		return ""

	head = u.read(8192)
	head = re.sub("[\r\n\t ]"," ",head)

	title = re.search('(?i)\<title\>(.*?)\</title\>', head)
	if title:
		title = title.group(1)
		return unescape(title)
	else:
		return ""

# /pt http://foo
def on_pagetitle(data, buffer, args):
	if len(args) == 0:
		return weechat.WEECHAT_RC_ERROR

	msg = args

	def urlReplace(match):
		url = match.group()
		try:
			if debug:
				weechat.prnt(buffer, "pagetitle: retrieving '%s'" % url)
			title = getPageTitle(url)
			if len(title) > limit_title_length:
				title = "%s [...]" % title[0:limit_title_length]
			url = "%s ('%s')" % (url, title)
		except NameError, e:
			weechat.prnt(buffer, "pagetitle: URL: '%s', Error: '%s'" % (url, e))
		return url

	msg = regex_url.sub(urlReplace, msg)
	weechat.command(buffer, "/say %s" % msg)

	return weechat.WEECHAT_RC_OK

# Register plugin
import weechat

weechat.register ('pagetitle', 'xororand', '0.5', 'GPL3', """Adds HTML titles to http:// urls in your message.""", "", "")
desc = """Sends a message to the current buffer and adds HTML titles to http:// URLs.
Example: /pt check this out: http://xkcd.com/364/
<you> check this out: http://xkcd.com/364/ (xkcd - A webcomic of romance, sarcasm, math and language)"""
weechat.hook_command ('pt', desc, 'message', 'message containing an URL', '', 'on_pagetitle', '')

# vim:set ts=4 sw=4 noexpandtab nowrap foldmethod=marker: