/
spider_task_factory.php
94 lines (81 loc) · 2.27 KB
/
spider_task_factory.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
<?php
/**
* @author bigbigant
*/
use Comos\Qpm\Process\Runnable;
require __DIR__ . '/bootstrap.inc.php';
/**
* 任务工厂,必须实现 fetchTask方法。
* 该方法正常返回
*/
class SpiderTaskFactory
{
private $_fh;
public function __construct($input)
{
$this->_input = $input;
$this->_fh = fopen($input, 'r');
if ($this->_fh === false) {
throw new Exception('fopen failed:' . $input);
}
}
public function fetchTask()
{
while (true) {
if (feof($this->_fh)) {
throw new Comos\Qpm\Supervision\StopSignal();
}
$line = trim(fgets($this->_fh));
if ($line == 'END') {
throw new Comos\Qpm\Supervision\StopSignal();
}
if (empty($line)) {
continue;
}
break;
}
return new SpiderTask($line);
}
}
/**
* 在子进程中执行任务的类
* 必须实现 Comos\Qpm\Process\Runnable 接口
*/
class SpiderTask implements Comos\Qpm\Process\Runnable
{
private $_target;
public function __construct($target)
{
$this->_target = $target;
}
// 在子进程中执行的部分
public function run()
{
$r = @file_get_contents($this->_target);
if ($r === false) {
throw new Exception('fail to crawl url:' . $this->_target);
}
file_put_contents($this->getLocalFilename(), $r);
}
private function getLocalFilename()
{
$filename = str_replace('/', '~', $this->_target);
$filename = str_replace(':', '_', $filename);
$filename = $filename . '-' . date('YmdHis');
return __DIR__ . '/_spider/' . $filename . '.html';
}
}
// 如果没有从参数指定输入,把spider_task_factory_data.txt作为数据源
$input = isset($argv[1]) ? $argv[1] : __DIR__ . '/spider_task_factory_data.txt';
$spiderTaskFactory = new SpiderTaskFactory($input);
$config = array(
// 指定taskFactory对象和工厂方法
'factory' => array(
$spiderTaskFactory,
'fetchTask'
),
// 指定最大并发数量为3
'quantity' => 3
);
// 启动Supervisor
Comos\Qpm\Supervision\Supervisor::taskFactoryMode($config)->start();