400 lines
19 KiB
PHP
400 lines
19 KiB
PHP
<?php
|
||
|
||
namespace App\Services;
|
||
|
||
use App\Models\Live\LiveBanner;
|
||
use simplehtmldom\HtmlDocument;
|
||
use App\Models\CommunityActivity;
|
||
use App\Utils\Http;
|
||
|
||
ini_set('max_execution_time', '0');
|
||
|
||
class ScrapyService
|
||
{
|
||
public static $huDongBaUserList = [
|
||
"15915384578" => [ //橙子老师/ 幸福女子学堂
|
||
"huDongBaUserId" => 'un3es',
|
||
"ufutx_merchant_id" => 729,
|
||
"anchors_id" => 820,
|
||
],
|
||
"18820255084" => [ //DTA创玩家/张天乐
|
||
"huDongBaUserId" => '63bn',
|
||
"ufutx_merchant_id" => 154,
|
||
"anchors_id" => 258,
|
||
],
|
||
"18312402612" => [ //大龄之恋:邝雄伟
|
||
"huDongBaUserId" => '0r8bcu',
|
||
"ufutx_merchant_id" => 812,
|
||
"anchors_id" => 901,
|
||
],
|
||
"15016731593" =>[
|
||
"huDongBaUserId" => 'z8jm',
|
||
"ufutx_merchant_id" => 26,
|
||
"anchors_id" => 120,
|
||
],
|
||
"15002015539" =>[ //周媒人
|
||
"huDongBaUserId" => 'i8z5x',
|
||
"ufutx_merchant_id" => 767,
|
||
"anchors_id" => 856,
|
||
],
|
||
];
|
||
|
||
public static $huoDongXingUserList = [ //深圳取乐活动圈/龚道
|
||
"15871878634" => [
|
||
"huoDongXingUserId" => '9062700541096',
|
||
"ufutx_merchant_id" => 698,
|
||
"anchors_id" => 789,
|
||
],
|
||
"18520883213" => [ //三经书院
|
||
"huoDongXingUserId" => '3532527772821',
|
||
"ufutx_merchant_id" => 516,
|
||
"anchors_id" => 617,
|
||
]
|
||
];
|
||
|
||
public function getHuDongBaScrapy($mobile)
|
||
{
|
||
$val = self::$huDongBaUserList[$mobile];
|
||
if (!$val) {
|
||
return [];
|
||
}
|
||
$id = $val['huDongBaUserId'];
|
||
$url_parse = ["scheme" => "https", "host" => "www.hudongba.com", "path" => "u/" . $id . ".html", "query" => 'pageNum'];
|
||
$url_base = $url_parse['scheme'] . '://' . $url_parse['host'];
|
||
$pageNum = 1;
|
||
for ($i = 0; $i < 5; $i++) {
|
||
sleep(rand(3, 6));
|
||
$url = $url_parse['scheme'] . '://' . $url_parse['host'] . '/' . $url_parse['path'] . "?" . $url_parse['query'] . "=" . $pageNum;
|
||
$results = $this->getHuDongBaActiveDetails($url, $url_base, $val);
|
||
if (isset($results) && is_array($results)) {
|
||
foreach ($results as $result) {
|
||
$this->saveDataBase($result, $val);
|
||
}
|
||
}
|
||
$pageNum += 1;
|
||
}
|
||
return [];
|
||
}
|
||
|
||
private function getHuDongBaActiveDetails($url, $url_base, $val)
|
||
{
|
||
$html = new HtmlDocument();
|
||
$html->loadFile($url);
|
||
$rets = $html->find('div.timeline-main div.info-party-list');
|
||
if (!$rets) {
|
||
return null;
|
||
}
|
||
$active_list = [];
|
||
foreach ($rets as $ret) {
|
||
if ($ret->attr['class'] == "info-party-list" && isset($ret->children)) {
|
||
foreach ($ret->children as $children1) {
|
||
if ($children1->tag == 'a' && $children1->attr['class'] == "party-list-link") {
|
||
if ($children1->children[0]->tag == 'div' && $children1->children[0]->attr['class'] == 'info-img') {
|
||
if ($children1->children[0]->children[0]->tag == 'img' && isset($children1->children[0]->children[0]->attr['src'])) {
|
||
$active['img'] = $children1->children[0]->children[0]->attr['src'];
|
||
}elseif ($children1->children[0]->children[1]->tag == 'img' && isset($children1->children[0]->children[1]->attr['src'])) {
|
||
$active['img'] = $children1->children[0]->children[1]->attr['src'];
|
||
}
|
||
}
|
||
} elseif ($children1->tag == 'div' && $children1->attr['class'] == "info-mation") {
|
||
foreach ($children1->children as $children2) {
|
||
if ($children2->tag == 'a' && $children2->attr['class'] == 'party-list-link' && isset($children2->attr['href'])) {
|
||
$active['details_url'] = $children2->attr['href'];
|
||
if ($children2->children[0]->tag == 'div' && $children2->children[0]->attr['class'] == 'info-title') {
|
||
$active['title'] = $children2->children[0]->plaintext;
|
||
}
|
||
if (isset($active['details_url'])) {
|
||
//活动详情
|
||
$detail_url = parse_url($active['details_url']);
|
||
if ($detail_url && !empty($detail_url['path'])) {
|
||
$temp1 = explode("/", $detail_url['path']);
|
||
$active_id = str_replace(".html", "", $temp1[2]);
|
||
$active['details_url'] = $url_base . "/party" . "/" . $active_id . ".html";
|
||
}
|
||
sleep(rand(3, 9));
|
||
$result = Http::get($active['details_url']);
|
||
//删除 <div class="chapter">之前的内容
|
||
$str = "";
|
||
if ($x = strpos($result, '<!--活动详情-->')) {
|
||
$str = substr($result, $x);
|
||
}
|
||
//删除 <!-- 章节列表 结束 -->之后的内容
|
||
if ($x = strpos($str, '<a name="member_anchor"></a>')) {
|
||
$str = substr($str, 0, $x);
|
||
}
|
||
$active['details'] = $str;
|
||
if (mb_strpos($active['title'], "线上") || (isset($active['address']) && mb_strpos($active['address'], "线上活动"))) { //线上=0,线下=1
|
||
$active['location'] = 0;
|
||
} else {
|
||
$active['location'] = 1;
|
||
}
|
||
//时间地点
|
||
$details_html = new HtmlDocument();
|
||
$details_html->load($result);
|
||
$details_rets = $details_html->find('div.detail_time_attr_join div.detail_time_attr_join_gray');
|
||
if ($details_rets) {
|
||
foreach ($details_rets as $details_ret) {
|
||
if (isset($details_ret->children)) {
|
||
foreach ($details_ret->children as $details_children) {
|
||
if (isset($details_children->attr['class']) && $details_children->attr['class'] == 'detail_Attr') {
|
||
foreach ($details_children->children as $details_children1) {
|
||
if ($details_children1->tag == 'a') {
|
||
$active['address'] = $details_children1->plaintext;
|
||
}
|
||
}
|
||
} else {
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
//获取时间
|
||
$detail_url = parse_url($active['details_url']);
|
||
if ($detail_url && !empty($detail_url['path'])) {
|
||
$temp1 = explode("/", $detail_url['path']);
|
||
$active_id = str_replace(".html", "", $temp1[2]);
|
||
$date_url = "https://api.hudongba.com/ajax/api:110?infoId36=" . $active_id . "&type=1&_time=" . time();
|
||
sleep(rand(3, 6));
|
||
$res = Http::get($date_url);
|
||
if ($res) {
|
||
$res = json_decode($res, true);
|
||
$active['start_time'] = $res['result']['frequencyList'][0]['startDate'];
|
||
$active['end_time'] = $res['result']['frequencyList'][0]['endTime'];
|
||
}
|
||
}
|
||
}
|
||
} elseif ($children2->tag == 'div' && $children2->attr['class'] == 'info-price') {
|
||
if ($children2->children[0]->tag == 'span' && $children2->children[0]->attr['class'] == 'n-price') {
|
||
$price = $children2->children[0]->plaintext;
|
||
if ($price == '免费') {
|
||
$active['price'] = 0.00;
|
||
} else {
|
||
$price = str_replace("¥", "", $price);
|
||
$price = str_replace("起", "", $price);
|
||
$active['price'] = (int)$price;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
$active_list[] = $active;
|
||
unset($active);
|
||
}
|
||
}
|
||
return $active_list;
|
||
}
|
||
|
||
public function saveDataBase($active, $merchantInfo)
|
||
{
|
||
//开始写表
|
||
$ret = CommunityActivity::where("title", $active['title'])->where('anchor_id', $merchantInfo['anchors_id'])
|
||
->where("merchant_id", $merchantInfo['ufutx_merchant_id'])->first();
|
||
if (!$ret && isset($active['details']) && !empty($active['details'])) {
|
||
$communityActivity = new CommunityActivity();
|
||
if (!empty($merchantInfo['anchors_id'])) {
|
||
$communityActivity->anchor_id = $merchantInfo['anchors_id'];
|
||
}
|
||
$communityActivity->merchant_id = $merchantInfo['ufutx_merchant_id'];
|
||
$communityActivity->title = $active['title'];
|
||
$communityActivity->pv = 1;
|
||
if (isset($active['price'])) {
|
||
$communityActivity->price = $active['price'];
|
||
}
|
||
if (isset($active['pay_type'])) {
|
||
$communityActivity->pay_type = $active['pay_type'];
|
||
}
|
||
if (isset($active['location'])) {
|
||
$communityActivity->location = $active['location'];
|
||
}
|
||
$communityActivity->sort = 1;
|
||
$communityActivity->start_time = $active['start_time'];
|
||
$communityActivity->end_time = $active['end_time'];
|
||
$communityActivity->apply_deadline = $active['start_time'];
|
||
$communityActivity->status = 0;
|
||
$communityActivity->type = "business";
|
||
$communityActivity->pic = $active['img'];
|
||
$communityActivity->describe = $active['details'];
|
||
$communityActivity->is_top = 0;
|
||
$communityActivity->class = "one";
|
||
$communityActivity->is_top = 0;
|
||
$communityActivity->is_love_show = 0;
|
||
$communityActivity->reward_status = 0;
|
||
if (isset($active['address'])) {
|
||
$communityActivity->address = $active['address'];
|
||
}
|
||
$communityActivity->insurance = 0;
|
||
$communityActivity->save();
|
||
$id = $communityActivity->id;
|
||
//banner
|
||
$banner = LiveBanner::where('class', 'community')->where('class_id', $id)->first();
|
||
if (!$banner) {
|
||
$live_banner = new LiveBanner();
|
||
$live_banner->icon = json_encode([$active['img']]);
|
||
$live_banner->status = 1;
|
||
$live_banner->class = "community";
|
||
$live_banner->class_id = $id;
|
||
$live_banner->save();
|
||
}
|
||
}
|
||
}
|
||
|
||
public function getHuoDongXingScrapy($mobile)
|
||
{
|
||
$val = self::$huoDongXingUserList[$mobile];
|
||
if (!$val) {
|
||
return [];
|
||
}
|
||
$id = $val['huoDongXingUserId'];
|
||
$url_base = ["scheme" => "https", "host" => "gz.huodongxing.com", "path" => "/people/" . $id, "query" => "type=pub"];
|
||
$temp = explode('/', $url_base['path']);
|
||
$url = $url_base['scheme'] . '://' . $url_base['host'] . '/' . $temp[1] . '/' . $id . "?" . $url_base['query'];
|
||
$url_details = $url_base['scheme'] . '://' . $url_base['host'];
|
||
$results = $this->getHuoDongXingActiveDetails($url, $url_details, $val);
|
||
if (isset($results) && is_array($results)) {
|
||
foreach ($results as $result) {
|
||
$this->saveDataBase($result, $val);
|
||
}
|
||
}
|
||
return [];
|
||
}
|
||
|
||
private function getHuoDongXingActiveDetails($url, $url_base, $merchantInfo)
|
||
{
|
||
$html = new HtmlDocument();
|
||
$html->loadFile($url);
|
||
$rets = $html->find('div#user-favorite-event li ul li');
|
||
if (!$rets) {
|
||
return null;
|
||
}
|
||
$active_list = [];
|
||
foreach ($rets as $ret) {
|
||
foreach ($ret->children as $children_1) {
|
||
if ($children_1->tag == 'div') {
|
||
continue;
|
||
} elseif ($children_1->tag == 'a') {
|
||
$active['Url'] = $children_1->attr['href'];
|
||
if (isset($children_1->children)) {
|
||
foreach ($children_1->children as $children_2) {
|
||
if ($children_2->tag == 'img') {
|
||
$active['img'] = $children_2->attr['src'];
|
||
}
|
||
if ($children_2->tag == 'div' && $children_2->class == 'media-body') {
|
||
foreach ($children_2->children as $children_3) {
|
||
if ($children_3->tag == "h3") {
|
||
$active['title'] = $children_3->plaintext;
|
||
}
|
||
if ($children_3->tag == "div" && $children_3->children[0]->attr['class'] == 'icon-place') {
|
||
$active['address'] = $children_3->plaintext;
|
||
}
|
||
if ($children_3->tag == "img" && $children_3->children[0]->attr['class'] == 'face') {
|
||
$active['logo'] = $children_3->children[0]->attr['src'];
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
$active_list[] = $active;
|
||
}
|
||
$active_list = $this->arrayUniqueness($active_list, "title");
|
||
foreach ($active_list as $key => $active) {
|
||
if (isset($active['Url'])) {
|
||
$details_url = $url_base . $active['Url'];
|
||
sleep(rand(6, 9));
|
||
$result = Http::get($details_url);
|
||
//删除 <div class="chapter">之前的内容
|
||
$str = "";
|
||
if ($x = strpos($result, '<div class=" " id="event_desc_page">')) {
|
||
$str = substr($result, $x);
|
||
}
|
||
//删除 <!-- 章节列表 结束 -->之后的内容
|
||
if ($x = strpos($str, '<div class="clear">')) {
|
||
$str = substr($str, 0, $x);
|
||
}
|
||
|
||
$active_list[$key]['details'] = $str;
|
||
//金额
|
||
if ($x = strpos($result, '[{"SN":')) {
|
||
$str = substr($result, $x);
|
||
}
|
||
if ($x = strpos($str, 'var eventModelId')) {
|
||
$str = substr($str, 0, $x);
|
||
$str = str_replace(";\r\n\t", "", $str);
|
||
$str = json_decode($str, true);
|
||
if ($str) {
|
||
$active_list[$key]['price'] = $str[0]['Price'];
|
||
if ($active_list[$key]['price'] > 0) {
|
||
//收费和免费
|
||
$active_list[$key]['pay_type'] = 'wechat';
|
||
} else {
|
||
$active_list[$key]['pay_type'] = 'free';
|
||
}
|
||
}
|
||
}
|
||
//时间
|
||
if ($x = strpos($result, '<div class="address-info-wrap">')) {
|
||
$str = substr($result, $x);
|
||
}
|
||
if ($x = strpos($str, '<div class="address" style="padding-left: 0px;" title="活动地点">')) {
|
||
$str = substr($str, 0, $x);
|
||
}
|
||
$html = new HtmlDocument();
|
||
$html->load($str);
|
||
$rets = $html->find('div.address-info-wrap div');
|
||
$time = $rets[0]->plaintext;
|
||
if ($time) {
|
||
list($start_time, $end_time) = explode(" ~ ", $time);
|
||
if ($start_time && $end_time) {
|
||
$start_time = date_parse_from_format('Y.m.d H:i:s', $start_time);
|
||
$active_list[$key]['start_time'] = $start_time['year'] . '-' . $start_time['month'] . "-" . $start_time['day'] . " " . $start_time['hour'] . ":" . $start_time['minute'] . ":" . $start_time['second'];
|
||
$end_time = date_parse_from_format('Y.m.d H:i:s', $end_time);
|
||
$active_list[$key]['end_time'] = $end_time['year'] . '-' . $end_time['month'] . "-" . $end_time['day'] . " " . $end_time['hour'] . ":" . $end_time['minute'] . ":" . $end_time['second'];
|
||
}
|
||
}
|
||
//活动地点&&线上线下活动
|
||
if ($x = strpos($result, '<div class="address" style="padding-left: 0px;" title="活动地点">')) {
|
||
$str = substr($result, $x);
|
||
}
|
||
if ($x = strpos($str, '<div title="活动发起人">')) {
|
||
$str = substr($str, 0, $x);
|
||
}
|
||
$html = new HtmlDocument();
|
||
$html->load($str);
|
||
$rets = $html->find('span.link-a-hover');
|
||
if ($rets) {
|
||
$active_list[$key]['address'] = $rets[0]->plaintext;
|
||
if ($active_list[$key]['address'] == '线上活动') {
|
||
$active_list[$key]['location'] = 0;
|
||
} else {
|
||
$active_list[$key]['location'] = 1;
|
||
}
|
||
} else {
|
||
$active_list[$key]['location'] = 1;
|
||
}
|
||
|
||
}
|
||
}
|
||
return $active_list;
|
||
}
|
||
|
||
private function arrayUniqueness($arr, $key)
|
||
{
|
||
$res = array();
|
||
foreach ($arr as $value) {
|
||
//查看有没有重复项
|
||
if (isset($res[$value[$key]])) {
|
||
//有:销毁
|
||
unset($value[$key]);
|
||
} else {
|
||
$res[$value[$key]] = $value;
|
||
}
|
||
}
|
||
return array_reverse($res);
|
||
}
|
||
}
|
||
|
||
|