158 lines
4.5 KiB
PHP
158 lines
4.5 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use GuzzleHttp\Client;
|
|
use Illuminate\Support\Carbon;
|
|
|
|
class ConstellationCrawler
|
|
{
|
|
private $baseUrl = 'http://astro.click108.com.tw/daily_1.php';
|
|
|
|
private $constellationPageIds = [
|
|
'Aries' => 0,
|
|
'Taurus' => 1,
|
|
'Gemini' => 2,
|
|
'Cancer' => 3,
|
|
'Leo' => 4,
|
|
'Virgo' => 5,
|
|
'Libra' => 6,
|
|
'Scorpio' => 7,
|
|
'Sagittarius' => 8,
|
|
'Capricorn' => 9,
|
|
'Aquarius' => 10,
|
|
'Pisces' => 11,
|
|
];
|
|
|
|
private function getHtml($pageId = 0)
|
|
{
|
|
$httpClient = new Client;
|
|
$resp = $httpClient->request('GET', $this->baseUrl . '?' . http_build_query([
|
|
'iAstro' => $pageId,
|
|
'iAcDay' => Carbon::now()->format('Y-m-d')
|
|
]));
|
|
if($resp->getStatusCode() == 200) {
|
|
return $resp->getBody()->getContents();
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
private function removeEscapeSign($str)
|
|
{
|
|
return str_replace('\\', '', $str);
|
|
}
|
|
|
|
private function parseHtml($html)
|
|
{
|
|
$matches = [];
|
|
$entryPattern = '<div class="TODAY_CONTENT">';
|
|
|
|
$htmlPatterns = [
|
|
[
|
|
'pattern' => '<p><span class="txt_green">',
|
|
],
|
|
[
|
|
'pattern' => '<\/span><\/p>',
|
|
'name' => 'all',
|
|
'type' => 'level'
|
|
],
|
|
[
|
|
'pattern' => '<p>'
|
|
],
|
|
[
|
|
'pattern' => '<\/p>',
|
|
'name' => 'all_desc',
|
|
'type' => 'text'
|
|
],
|
|
[
|
|
'pattern' => '<p><span class="txt_pink">',
|
|
],
|
|
[
|
|
'pattern' => '<\/span><\/p>',
|
|
'name' => 'love',
|
|
'type' => 'level'
|
|
],
|
|
[
|
|
'pattern' => '<p>'
|
|
],
|
|
[
|
|
'pattern' => '<\/p>',
|
|
'name' => 'love_desc',
|
|
'type' => 'text'
|
|
],
|
|
[
|
|
'pattern' => '<p><span class="txt_blue">',
|
|
],
|
|
[
|
|
'pattern' => '<\/span><\/p>',
|
|
'name' => 'career',
|
|
'type' => 'level'
|
|
],
|
|
[
|
|
'pattern' => '<p>'
|
|
],
|
|
[
|
|
'pattern' => '<\/p>',
|
|
'name' => 'career_desc',
|
|
'type' => 'text'
|
|
],
|
|
[
|
|
'pattern' => '<p><span class="txt_orange">',
|
|
],
|
|
[
|
|
'pattern' => '<\/span><\/p>',
|
|
'name' => 'income',
|
|
'type' => 'level'
|
|
],
|
|
[
|
|
'pattern' => '<p>'
|
|
],
|
|
[
|
|
'pattern' => '<\/p>',
|
|
'name' => 'income_desc',
|
|
'type' => 'text'
|
|
],
|
|
];
|
|
|
|
preg_match("/$entryPattern/", $html, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
if(!empty($matches[0])) {
|
|
$offset = $matches[0][1];
|
|
$html = trim(substr($html, $offset + strlen($entryPattern)));
|
|
$data = [];
|
|
|
|
foreach ($htmlPatterns as $htmlPattern) {
|
|
preg_match("/{$htmlPattern['pattern']}/", $html, $matches, PREG_OFFSET_CAPTURE);
|
|
$offset = $matches[0][1];
|
|
if(!empty($htmlPattern['type'])) {
|
|
if($htmlPattern['type'] == 'level') {
|
|
$rateText = substr($html, 0, $offset);
|
|
$starMatches = [];
|
|
preg_match('/★+/u', $rateText, $starMatches);
|
|
$rate = empty($starMatches[0]) ? 0 : mb_strlen($starMatches[0]);
|
|
$data[$htmlPattern['name']] = $rate;
|
|
} else if($htmlPattern['type'] == 'text') {
|
|
$data[$htmlPattern['name']] = substr($html, 0, $offset);
|
|
}
|
|
}
|
|
$html = trim(substr($html, $offset + strlen($this->removeEscapeSign($htmlPattern['pattern']))));
|
|
}
|
|
return $data;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public function getConstellations()
|
|
{
|
|
$data = [];
|
|
foreach ($this->constellationPageIds as $name => $constellationPageId) {
|
|
$html = $this->getHtml($constellationPageId);
|
|
$data[$name] = $this->parseHtml($html);
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
}
|