review/app/Services/ConstellationCrawler.php

158 lines
4.5 KiB
PHP

<?php
namespace App\Services;
use GuzzleHttp\Client;
use Illuminate\Support\Carbon;
class ConstellationCrawler
{
private $baseUrl = 'http://astro.click108.com.tw/daily_1.php';
private $constellationPageIds = [
'Aries' => 0,
'Taurus' => 1,
'Gemini' => 2,
'Cancer' => 3,
'Leo' => 4,
'Virgo' => 5,
'Libra' => 6,
'Scorpio' => 7,
'Sagittarius' => 8,
'Capricorn' => 9,
'Aquarius' => 10,
'Pisces' => 11,
];
private function getHtml($pageId = 0)
{
$httpClient = new Client;
$resp = $httpClient->request('GET', $this->baseUrl . '?' . http_build_query([
'iAstro' => $pageId,
'iAcDay' => Carbon::now()->format('Y-m-d')
]));
if($resp->getStatusCode() == 200) {
return $resp->getBody()->getContents();
} else {
return '';
}
}
private function removeEscapeSign($str)
{
return str_replace('\\', '', $str);
}
private function parseHtml($html)
{
$matches = [];
$entryPattern = '<div class="TODAY_CONTENT">';
$htmlPatterns = [
[
'pattern' => '<p><span class="txt_green">',
],
[
'pattern' => '<\/span><\/p>',
'name' => 'all',
'type' => 'level'
],
[
'pattern' => '<p>'
],
[
'pattern' => '<\/p>',
'name' => 'all_desc',
'type' => 'text'
],
[
'pattern' => '<p><span class="txt_pink">',
],
[
'pattern' => '<\/span><\/p>',
'name' => 'love',
'type' => 'level'
],
[
'pattern' => '<p>'
],
[
'pattern' => '<\/p>',
'name' => 'love_desc',
'type' => 'text'
],
[
'pattern' => '<p><span class="txt_blue">',
],
[
'pattern' => '<\/span><\/p>',
'name' => 'career',
'type' => 'level'
],
[
'pattern' => '<p>'
],
[
'pattern' => '<\/p>',
'name' => 'career_desc',
'type' => 'text'
],
[
'pattern' => '<p><span class="txt_orange">',
],
[
'pattern' => '<\/span><\/p>',
'name' => 'income',
'type' => 'level'
],
[
'pattern' => '<p>'
],
[
'pattern' => '<\/p>',
'name' => 'income_desc',
'type' => 'text'
],
];
preg_match("/$entryPattern/", $html, $matches, PREG_OFFSET_CAPTURE);
if(!empty($matches[0])) {
$offset = $matches[0][1];
$html = trim(substr($html, $offset + strlen($entryPattern)));
$data = [];
foreach ($htmlPatterns as $htmlPattern) {
preg_match("/{$htmlPattern['pattern']}/", $html, $matches, PREG_OFFSET_CAPTURE);
$offset = $matches[0][1];
if(!empty($htmlPattern['type'])) {
if($htmlPattern['type'] == 'level') {
$rateText = substr($html, 0, $offset);
$starMatches = [];
preg_match('/★+/u', $rateText, $starMatches);
$rate = empty($starMatches[0]) ? 0 : mb_strlen($starMatches[0]);
$data[$htmlPattern['name']] = $rate;
} else if($htmlPattern['type'] == 'text') {
$data[$htmlPattern['name']] = substr($html, 0, $offset);
}
}
$html = trim(substr($html, $offset + strlen($this->removeEscapeSign($htmlPattern['pattern']))));
}
return $data;
} else {
return null;
}
}
public function getConstellations()
{
$data = [];
foreach ($this->constellationPageIds as $name => $constellationPageId) {
$html = $this->getHtml($constellationPageId);
$data[$name] = $this->parseHtml($html);
}
return $data;
}
}