love_php/app/Services/ScrapyService.php
2026-04-02 09:20:51 +08:00

400 lines
19 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Services;
use App\Models\Live\LiveBanner;
use simplehtmldom\HtmlDocument;
use App\Models\CommunityActivity;
use App\Utils\Http;
ini_set('max_execution_time', '0');
class ScrapyService
{
public static $huDongBaUserList = [
"15915384578" => [ //橙子老师/ 幸福女子学堂
"huDongBaUserId" => 'un3es',
"ufutx_merchant_id" => 729,
"anchors_id" => 820,
],
"18820255084" => [ //DTA创玩家/张天乐
"huDongBaUserId" => '63bn',
"ufutx_merchant_id" => 154,
"anchors_id" => 258,
],
"18312402612" => [ //大龄之恋:邝雄伟
"huDongBaUserId" => '0r8bcu',
"ufutx_merchant_id" => 812,
"anchors_id" => 901,
],
"15016731593" =>[
"huDongBaUserId" => 'z8jm',
"ufutx_merchant_id" => 26,
"anchors_id" => 120,
],
"15002015539" =>[ //周媒人
"huDongBaUserId" => 'i8z5x',
"ufutx_merchant_id" => 767,
"anchors_id" => 856,
],
];
public static $huoDongXingUserList = [ //深圳取乐活动圈/龚道
"15871878634" => [
"huoDongXingUserId" => '9062700541096',
"ufutx_merchant_id" => 698,
"anchors_id" => 789,
],
"18520883213" => [ //三经书院
"huoDongXingUserId" => '3532527772821',
"ufutx_merchant_id" => 516,
"anchors_id" => 617,
]
];
public function getHuDongBaScrapy($mobile)
{
$val = self::$huDongBaUserList[$mobile];
if (!$val) {
return [];
}
$id = $val['huDongBaUserId'];
$url_parse = ["scheme" => "https", "host" => "www.hudongba.com", "path" => "u/" . $id . ".html", "query" => 'pageNum'];
$url_base = $url_parse['scheme'] . '://' . $url_parse['host'];
$pageNum = 1;
for ($i = 0; $i < 5; $i++) {
sleep(rand(3, 6));
$url = $url_parse['scheme'] . '://' . $url_parse['host'] . '/' . $url_parse['path'] . "?" . $url_parse['query'] . "=" . $pageNum;
$results = $this->getHuDongBaActiveDetails($url, $url_base, $val);
if (isset($results) && is_array($results)) {
foreach ($results as $result) {
$this->saveDataBase($result, $val);
}
}
$pageNum += 1;
}
return [];
}
private function getHuDongBaActiveDetails($url, $url_base, $val)
{
$html = new HtmlDocument();
$html->loadFile($url);
$rets = $html->find('div.timeline-main div.info-party-list');
if (!$rets) {
return null;
}
$active_list = [];
foreach ($rets as $ret) {
if ($ret->attr['class'] == "info-party-list" && isset($ret->children)) {
foreach ($ret->children as $children1) {
if ($children1->tag == 'a' && $children1->attr['class'] == "party-list-link") {
if ($children1->children[0]->tag == 'div' && $children1->children[0]->attr['class'] == 'info-img') {
if ($children1->children[0]->children[0]->tag == 'img' && isset($children1->children[0]->children[0]->attr['src'])) {
$active['img'] = $children1->children[0]->children[0]->attr['src'];
}elseif ($children1->children[0]->children[1]->tag == 'img' && isset($children1->children[0]->children[1]->attr['src'])) {
$active['img'] = $children1->children[0]->children[1]->attr['src'];
}
}
} elseif ($children1->tag == 'div' && $children1->attr['class'] == "info-mation") {
foreach ($children1->children as $children2) {
if ($children2->tag == 'a' && $children2->attr['class'] == 'party-list-link' && isset($children2->attr['href'])) {
$active['details_url'] = $children2->attr['href'];
if ($children2->children[0]->tag == 'div' && $children2->children[0]->attr['class'] == 'info-title') {
$active['title'] = $children2->children[0]->plaintext;
}
if (isset($active['details_url'])) {
//活动详情
$detail_url = parse_url($active['details_url']);
if ($detail_url && !empty($detail_url['path'])) {
$temp1 = explode("/", $detail_url['path']);
$active_id = str_replace(".html", "", $temp1[2]);
$active['details_url'] = $url_base . "/party" . "/" . $active_id . ".html";
}
sleep(rand(3, 9));
$result = Http::get($active['details_url']);
//删除 <div class="chapter">之前的内容
$str = "";
if ($x = strpos($result, '<!--活动详情-->')) {
$str = substr($result, $x);
}
//删除 <!-- 章节列表 结束 -->之后的内容
if ($x = strpos($str, '<a name="member_anchor"></a>')) {
$str = substr($str, 0, $x);
}
$active['details'] = $str;
if (mb_strpos($active['title'], "线上") || (isset($active['address']) && mb_strpos($active['address'], "线上活动"))) { //线上=0,线下=1
$active['location'] = 0;
} else {
$active['location'] = 1;
}
//时间地点
$details_html = new HtmlDocument();
$details_html->load($result);
$details_rets = $details_html->find('div.detail_time_attr_join div.detail_time_attr_join_gray');
if ($details_rets) {
foreach ($details_rets as $details_ret) {
if (isset($details_ret->children)) {
foreach ($details_ret->children as $details_children) {
if (isset($details_children->attr['class']) && $details_children->attr['class'] == 'detail_Attr') {
foreach ($details_children->children as $details_children1) {
if ($details_children1->tag == 'a') {
$active['address'] = $details_children1->plaintext;
}
}
} else {
continue;
}
}
}
}
}
//获取时间
$detail_url = parse_url($active['details_url']);
if ($detail_url && !empty($detail_url['path'])) {
$temp1 = explode("/", $detail_url['path']);
$active_id = str_replace(".html", "", $temp1[2]);
$date_url = "https://api.hudongba.com/ajax/api:110?infoId36=" . $active_id . "&type=1&_time=" . time();
sleep(rand(3, 6));
$res = Http::get($date_url);
if ($res) {
$res = json_decode($res, true);
$active['start_time'] = $res['result']['frequencyList'][0]['startDate'];
$active['end_time'] = $res['result']['frequencyList'][0]['endTime'];
}
}
}
} elseif ($children2->tag == 'div' && $children2->attr['class'] == 'info-price') {
if ($children2->children[0]->tag == 'span' && $children2->children[0]->attr['class'] == 'n-price') {
$price = $children2->children[0]->plaintext;
if ($price == '免费') {
$active['price'] = 0.00;
} else {
$price = str_replace("", "", $price);
$price = str_replace("", "", $price);
$active['price'] = (int)$price;
}
}
}
}
}
}
$active_list[] = $active;
unset($active);
}
}
return $active_list;
}
public function saveDataBase($active, $merchantInfo)
{
//开始写表
$ret = CommunityActivity::where("title", $active['title'])->where('anchor_id', $merchantInfo['anchors_id'])
->where("merchant_id", $merchantInfo['ufutx_merchant_id'])->first();
if (!$ret && isset($active['details']) && !empty($active['details'])) {
$communityActivity = new CommunityActivity();
if (!empty($merchantInfo['anchors_id'])) {
$communityActivity->anchor_id = $merchantInfo['anchors_id'];
}
$communityActivity->merchant_id = $merchantInfo['ufutx_merchant_id'];
$communityActivity->title = $active['title'];
$communityActivity->pv = 1;
if (isset($active['price'])) {
$communityActivity->price = $active['price'];
}
if (isset($active['pay_type'])) {
$communityActivity->pay_type = $active['pay_type'];
}
if (isset($active['location'])) {
$communityActivity->location = $active['location'];
}
$communityActivity->sort = 1;
$communityActivity->start_time = $active['start_time'];
$communityActivity->end_time = $active['end_time'];
$communityActivity->apply_deadline = $active['start_time'];
$communityActivity->status = 0;
$communityActivity->type = "business";
$communityActivity->pic = $active['img'];
$communityActivity->describe = $active['details'];
$communityActivity->is_top = 0;
$communityActivity->class = "one";
$communityActivity->is_top = 0;
$communityActivity->is_love_show = 0;
$communityActivity->reward_status = 0;
if (isset($active['address'])) {
$communityActivity->address = $active['address'];
}
$communityActivity->insurance = 0;
$communityActivity->save();
$id = $communityActivity->id;
//banner
$banner = LiveBanner::where('class', 'community')->where('class_id', $id)->first();
if (!$banner) {
$live_banner = new LiveBanner();
$live_banner->icon = json_encode([$active['img']]);
$live_banner->status = 1;
$live_banner->class = "community";
$live_banner->class_id = $id;
$live_banner->save();
}
}
}
public function getHuoDongXingScrapy($mobile)
{
$val = self::$huoDongXingUserList[$mobile];
if (!$val) {
return [];
}
$id = $val['huoDongXingUserId'];
$url_base = ["scheme" => "https", "host" => "gz.huodongxing.com", "path" => "/people/" . $id, "query" => "type=pub"];
$temp = explode('/', $url_base['path']);
$url = $url_base['scheme'] . '://' . $url_base['host'] . '/' . $temp[1] . '/' . $id . "?" . $url_base['query'];
$url_details = $url_base['scheme'] . '://' . $url_base['host'];
$results = $this->getHuoDongXingActiveDetails($url, $url_details, $val);
if (isset($results) && is_array($results)) {
foreach ($results as $result) {
$this->saveDataBase($result, $val);
}
}
return [];
}
private function getHuoDongXingActiveDetails($url, $url_base, $merchantInfo)
{
$html = new HtmlDocument();
$html->loadFile($url);
$rets = $html->find('div#user-favorite-event li ul li');
if (!$rets) {
return null;
}
$active_list = [];
foreach ($rets as $ret) {
foreach ($ret->children as $children_1) {
if ($children_1->tag == 'div') {
continue;
} elseif ($children_1->tag == 'a') {
$active['Url'] = $children_1->attr['href'];
if (isset($children_1->children)) {
foreach ($children_1->children as $children_2) {
if ($children_2->tag == 'img') {
$active['img'] = $children_2->attr['src'];
}
if ($children_2->tag == 'div' && $children_2->class == 'media-body') {
foreach ($children_2->children as $children_3) {
if ($children_3->tag == "h3") {
$active['title'] = $children_3->plaintext;
}
if ($children_3->tag == "div" && $children_3->children[0]->attr['class'] == 'icon-place') {
$active['address'] = $children_3->plaintext;
}
if ($children_3->tag == "img" && $children_3->children[0]->attr['class'] == 'face') {
$active['logo'] = $children_3->children[0]->attr['src'];
}
}
}
}
}
}
}
$active_list[] = $active;
}
$active_list = $this->arrayUniqueness($active_list, "title");
foreach ($active_list as $key => $active) {
if (isset($active['Url'])) {
$details_url = $url_base . $active['Url'];
sleep(rand(6, 9));
$result = Http::get($details_url);
//删除 <div class="chapter">之前的内容
$str = "";
if ($x = strpos($result, '<div class=" " id="event_desc_page">')) {
$str = substr($result, $x);
}
//删除 <!-- 章节列表 结束 -->之后的内容
if ($x = strpos($str, '<div class="clear">')) {
$str = substr($str, 0, $x);
}
$active_list[$key]['details'] = $str;
//金额
if ($x = strpos($result, '[{"SN":')) {
$str = substr($result, $x);
}
if ($x = strpos($str, 'var eventModelId')) {
$str = substr($str, 0, $x);
$str = str_replace(";\r\n\t", "", $str);
$str = json_decode($str, true);
if ($str) {
$active_list[$key]['price'] = $str[0]['Price'];
if ($active_list[$key]['price'] > 0) {
//收费和免费
$active_list[$key]['pay_type'] = 'wechat';
} else {
$active_list[$key]['pay_type'] = 'free';
}
}
}
//时间
if ($x = strpos($result, '<div class="address-info-wrap">')) {
$str = substr($result, $x);
}
if ($x = strpos($str, '<div class="address" style="padding-left: 0px;" title="活动地点">')) {
$str = substr($str, 0, $x);
}
$html = new HtmlDocument();
$html->load($str);
$rets = $html->find('div.address-info-wrap div');
$time = $rets[0]->plaintext;
if ($time) {
list($start_time, $end_time) = explode(" ", $time);
if ($start_time && $end_time) {
$start_time = date_parse_from_format('Y.m.d H:i:s', $start_time);
$active_list[$key]['start_time'] = $start_time['year'] . '-' . $start_time['month'] . "-" . $start_time['day'] . " " . $start_time['hour'] . ":" . $start_time['minute'] . ":" . $start_time['second'];
$end_time = date_parse_from_format('Y.m.d H:i:s', $end_time);
$active_list[$key]['end_time'] = $end_time['year'] . '-' . $end_time['month'] . "-" . $end_time['day'] . " " . $end_time['hour'] . ":" . $end_time['minute'] . ":" . $end_time['second'];
}
}
//活动地点&&线上线下活动
if ($x = strpos($result, '<div class="address" style="padding-left: 0px;" title="活动地点">')) {
$str = substr($result, $x);
}
if ($x = strpos($str, '<div title="活动发起人">')) {
$str = substr($str, 0, $x);
}
$html = new HtmlDocument();
$html->load($str);
$rets = $html->find('span.link-a-hover');
if ($rets) {
$active_list[$key]['address'] = $rets[0]->plaintext;
if ($active_list[$key]['address'] == '线上活动') {
$active_list[$key]['location'] = 0;
} else {
$active_list[$key]['location'] = 1;
}
} else {
$active_list[$key]['location'] = 1;
}
}
}
return $active_list;
}
private function arrayUniqueness($arr, $key)
{
$res = array();
foreach ($arr as $value) {
//查看有没有重复项
if (isset($res[$value[$key]])) {
//有:销毁
unset($value[$key]);
} else {
$res[$value[$key]] = $value;
}
}
return array_reverse($res);
}
}