WeeChat scripts
Script: announce_url_title.py
Back to scripts |
Download
# -*- coding: utf-8 -*-
#
# Copyright (c) 2009 by xt <xt@bash.no>
# Borrowed parts from pagetitle.py by xororand
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
#
# If someone posts an URL in a configured channel
# this script will post back title
#
#
# History:
# 2009-12-08, Chaz6
# version 0.5: only announce for specified channels
# 2009-12-08, Chaz6 <chaz@chaz6.com>
# version 0.4: add global option
# 2009-12-08, xt
# version 0.3: option for public announcing or not
# 2009-12-07, xt <xt@bash.no>
# version 0.2: don't renannounce same urls for a time
# add optional prefix and suffix
# 2009-12-02, xt
# version 0.1: initial
import weechat
w = weechat
import re
import htmllib
from time import time as now
SCRIPT_NAME = "announce_url_title"
SCRIPT_AUTHOR = "xt <xt@bash.no>"
SCRIPT_VERSION = "0.5"
SCRIPT_LICENSE = "GPL"
SCRIPT_DESC = "Look up URL title"
settings = {
"buffers" : 'freenode.#testing,', # comma separated list of buffers
'title_max_length': '100',
'url_ignore' : '', # comma separated list of strings in url to ignore
'reannounce_wait': '5', # 5 minutes delay
'prefix': '',
'suffix': '',
'announce_public': 'off', # print it or msg the buffer
'global': 'off', # whether to enable for all buffers
}
octet = r'(?:2(?:[0-4]\d|5[0-5])|1\d\d|\d{1,2})'
ipAddr = r'%s(?:\.%s){3}' % (octet, octet)
# Base domain regex off RFC 1034 and 1738
label = r'[0-9a-z][-0-9a-z]*[0-9a-z]?'
domain = r'%s(?:\.%s)*\.[a-z][-0-9a-z]*[a-z]?' % (label, label)
urlRe = re.compile(r'(\w+://(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % (domain, ipAddr), re.I)
url_hook_process = ''
buffer_name = ''
url_stdout = ''
urls = {}
def get_buffer_name(bufferp):
bufferd = w.buffer_get_string(bufferp, "name")
return bufferd
def unescape(s):
"""Unescape HTML entities"""
p = htmllib.HTMLParser(None)
p.save_bgn()
p.feed(s)
return p.save_end()
def url_print_cb(data, buffer, time, tags, displayed, highlight, prefix, message):
global url_hook_process, buffer_name, url_stdout, urls
msg_buffer_name = get_buffer_name(buffer)
# Skip ignored buffers
found = False
if w.config_get_plugin('global') == 'on':
found = True
buffer_name = msg_buffer_name
else:
for active_buffer in w.config_get_plugin('buffers').split(','):
if active_buffer.lower() == msg_buffer_name.lower():
found = True
buffer_name = msg_buffer_name
break
if not found:
return w.WEECHAT_RC_OK
ignorelist = w.config_get_plugin('url_ignore').split(',')
for url in urlRe.findall(message):
ignore = False
for ignore_part in ignorelist:
if ignore_part.strip():
if ignore_part in url:
ignore = True
w.prnt('', '%s: Found %s in URL: %s, ignoring.' %(SCRIPT_NAME, ignore_part, url))
break
if ignore:
continue
if url in urls:
continue
else:
urls[url] = now()
if url_hook_process != "":
w.unhook(url_hook_process)
url_hook_process = ""
url_stdout = ""
# Read 8192
url_hook_process = w.hook_process(
"python -c \"import urllib2; print urllib2.urlopen('" + url + "').read(8192)\"",
30 * 1000, "url_process_cb", "")
return w.WEECHAT_RC_OK
def url_process_cb(data, command, rc, stdout, stderr):
""" Callback parsing html for title """
global url_hook_process, buffer_name, url_stdout
if stdout != "":
url_stdout += stdout
if int(rc) >= 0:
head = re.sub("[\r\n\t ]"," ", url_stdout)
title = re.search('(?i)\<title\>(.*?)\</title\>', head)
if title:
title = unescape(title.group(1))
max_len = int(w.config_get_plugin('title_max_length'))
if len(title) > max_len:
title = "%s [...]" % title[0:max_len]
splits = buffer_name.split('.') #FIXME bad code
server = splits[0]
buffer = '.'.join(splits[1:])
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
announce_public = w.config_get_plugin('announce_public')
if announce_public == 'on':
found = False
for active_buffer in w.config_get_plugin('buffers').split(','):
if active_buffer.lower() == buffer_name.lower():
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
found = True
if found == False:
w.prnt(w.buffer_search('', buffer_name), 'URL title\t' +output)
else:
w.prnt(w.buffer_search('', buffer_name), 'URL title\t' +output)
url_hook_process = ''
return w.WEECHAT_RC_OK
def purge_cb(*args):
''' Purge the url list on configured intervals '''
global urls
t_now = now()
for url in urls.keys():
if (t_now - urls[url]) > \
int(w.config_get_plugin('reannounce_wait'))*60:
del urls[url]
return w.WEECHAT_RC_OK
if __name__ == "__main__":
if w.register(SCRIPT_NAME, SCRIPT_AUTHOR, SCRIPT_VERSION, SCRIPT_LICENSE,
SCRIPT_DESC, "", ""):
# Set default settings
for option, default_value in settings.iteritems():
if not w.config_is_set_plugin(option):
w.config_set_plugin(option, default_value)
w.hook_print("", "", "://", 1, "url_print_cb", "")
w.hook_timer(\
int(w.config_get_plugin('reannounce_wait')) * 1000 * 60,
0,
0,
"purge_cb",
'')