Version 1 (modified by tianxc, 16 years ago) |
---|
Swish-e
swishbuild Building Swish-e package
swish- swishbuild- swishbuild-again
Important
- Do not run swish-e as root.
swish-e configuration (spider.conf)
my %dayabay = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.ihep.ac.cn/', delay_sec => '1', max_depth => '1', credentials => 'dayabay:3quarks' ); my %documents = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.bnl.gov/private/documents/', delay_sec => '1', max_depth => '1', credentials => 'dayabay:3quarks' ); my %docdb = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.ihep.ac.cn/cgi-bin/DocDB/ListBy?alldocs=1', delay_sec => '1', max_depth => '1', credentials => 'dayabay:3quarks' ); my %engdb = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.ihep.ac.cn/cgi-bin/EngDB/ListBy?alldocs=1', delay_sec => '1', max_depth => '1', credentials => 'dayabay:3quarks' ); my %internal = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.ihep.ac.cn/internal/', delay_sec => '1', max_depth => '1', credentials => 'dayabay:3quarks' ); my %publicwiki = ( email => 'tianxc@ihep.ac.cn', base_url => 'https://wiki.bnl.gov/dayabay/index.php?title=Main_Page', delay_sec => '1', max_depth => '2', ); my %privatewiki = ( email => 'tianxc@ihep.ac.cn', base_url => 'https://wiki.bnl.gov/dayabay-private/index.php?title=Main_Page', delay_sec => '1', max_depth => '2', credentials => 'dayabay:3quarks' ); my %repository = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.ihep.ac.cn/tracs/dybsvn/browser/', delay_sec => '1', max_depth => '10', credentials => 'dayabay:3quarks' ); my %trac = ( email => 'tianxc@ihep.ac.cn', base_url => 'http://dayabay.ihep.ac.cn/tracs/dybsvn', delay_sec => '1', max_depth => '2', credentials => 'dayabay:3quarks' ); @servers = ( \%dayabay, \%documents, \%docdb, \%internal, \%engdb, \%publicwiki, \%privatewiki, \%repository, \%trac ); 1;
swish.conf
# Example configuration file # Tell Swish-e what to index (same as -i switch above): IndexDir spider.pl # And pass the name of spider config file to the spider: SwishProgParameters spider.conf # Tell Swish-e that .txt files are to use the text parser: IndexContents TXT* .txt # Otherwise, use the HTML parser: DefaultContents HTML* # Ask libxml2 to report any parsing errors and warnings or # any UTF-8 to 8859-1 conversion errors: ParserWarnLevel 9
Indexing
touch indexing_time.file /usr/local/bin/swish-e -c swish.conf -S prog