Swish-e
swishbuild Building Swish-e package
swish-
swishbuild-
swishbuild-again
Important
- Do not run swish-e as root.
swish-e configuration (spider.conf)
my %dayabay = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.ihep.ac.cn/',
delay_sec => '1',
max_depth => '1',
credentials => 'dayabay:3quarks'
);
my %documents = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.bnl.gov/private/documents/',
delay_sec => '1',
max_depth => '1',
credentials => 'dayabay:3quarks'
);
my %docdb = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.ihep.ac.cn/cgi-bin/DocDB/ListBy?alldocs=1',
delay_sec => '1',
max_depth => '1',
credentials => 'dayabay:3quarks'
);
my %engdb = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.ihep.ac.cn/cgi-bin/EngDB/ListBy?alldocs=1',
delay_sec => '1',
max_depth => '1',
credentials => 'dayabay:3quarks'
);
my %internal = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.ihep.ac.cn/internal/',
delay_sec => '1',
max_depth => '1',
credentials => 'dayabay:3quarks'
);
my %publicwiki = (
email => 'tianxc@ihep.ac.cn',
base_url => 'https://wiki.bnl.gov/dayabay/index.php?title=Main_Page',
delay_sec => '1',
max_depth => '2',
);
my %privatewiki = (
email => 'tianxc@ihep.ac.cn',
base_url => 'https://wiki.bnl.gov/dayabay-private/index.php?title=Main_Page',
delay_sec => '1',
max_depth => '2',
credentials => 'dayabay:3quarks'
);
my %repository = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.ihep.ac.cn/tracs/dybsvn/browser/',
delay_sec => '1',
max_depth => '10',
credentials => 'dayabay:3quarks'
);
my %trac = (
email => 'tianxc@ihep.ac.cn',
base_url => 'http://dayabay.ihep.ac.cn/tracs/dybsvn',
delay_sec => '1',
max_depth => '2',
credentials => 'dayabay:3quarks'
);
@servers = ( \%dayabay, \%documents, \%docdb, \%internal, \%engdb, \%publicwiki, \%privatewiki, \%repository, \%trac );
1;
swish.conf
# Example configuration file
# Tell Swish-e what to index (same as -i switch above):
IndexDir spider.pl
# And pass the name of spider config file to the spider:
SwishProgParameters spider.conf
# Tell Swish-e that .txt files are to use the text parser:
IndexContents TXT* .txt
# Otherwise, use the HTML parser:
DefaultContents HTML*
# Ask libxml2 to report any parsing errors and warnings or
# any UTF-8 to 8859-1 conversion errors:
ParserWarnLevel 9
Indexing
touch indexing_time.file
/usr/local/bin/swish-e -c swish.conf -S prog