3.整个过程,包括重建数据库表结构,解析json等如同逆向运营商的数据库一般。总体来说,三大运营商更新频率不算频繁,还算较稳定,数据结构,网页结构等都不会做很大的变动。
/*r#############################################################################################r# TongXinZhenXin Solutionr# 通信征信数据抓取解决方案r# ========================================================================================= #r# File: CrawlerUNC.cs 联通抓取类r# ========================================================================================= #r# Credits: 0ng ching tongr# Thanks: ..r# Time: 2016年5月12日17:10:09r# Update: 联通最近一次抓包分析时间:2016-5-12 17:15:32r#############################################################################################r*/rrusing Crawler.Common;rusing Crawler.Interface;rusing System;rusing System.Collections.Generic;rusing System.Linq;rusing System.Net;rusing System.Text;rusing System.Text.RegularExpressions;rusing System.Threading;rusing System.Threading.Tasks;rusing YXRepository.Log;rusing YXRepository.Model;rrnamespace Crawler {r public class CrawlerUNC : CrawlerBase, ICrawler {rr HttpHelperNew hhn;r static IList loglist;r private string currentUVC {rr get {rr return HttpHelperNew.UNCuacverifykey;r }r }r /// r /// 中国联通 初始化链接r /// r public CrawlerUNC(string number, string pwd) {r hhn = new HttpHelperNew;r loglist = new List;rr currentPhoneNumber = number;r currentPhoneServicePwd = pwd;rr loginIsNeedVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CheckNeedVerify";r loginVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CreateImage";r loginToVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CtaIdyChk";r }rr public bool IsLoginNeedVerify {r string _url = loginIsNeedVerifyImgRequestUrl;r string queryData = "callback=jQuery17205245009011952871_" + TimeStamp.GetTimeStamp_13 + "&userName="+currentPhoneNumber+"&pwdType=01&_="r + TimeStamp.GetTimeStamp_13;r string retString = hhn.HttpGet(_url, queryData, HttpForType.联通);r //添加日志记录:r CollectJsonLog(_url,queryData,0,retString);r //r return retString.Contains(@"""resultCode"":""false""") ? false : true; r }rr /// r /// 联通登录不需要图片码r /// r /// r /// r public bool IsLoginImgVerifyOk(string imgcode) {r currentLoginImgCode = imgcode;rr string _url = loginToVerifyImgRequestUrl;r string queryData = "callback=jQuery17208163765012834383_1463034583178&verifyCode=" + currentLoginImgCode + "&verifyType=1&_=1463034805373";r string retString = hhn.HttpGet(_url, queryData);r //添加日志记录:r CollectJsonLog(_url, queryData, 0, retString);r //r return retString.Contains(@"""resultCode"":""true""") ? true : false;r }rr public string GetLoginImg {r loginVerifyImgStream = string.Empty;r string queryData = "t=1463034742570";r string part1 = "data:image/png;base64,";r string part2 = hhn.HttpGetImage(loginVerifyImgRequestUrl,r queryData, HttpForType.联通);//注意:rdmdmd5这个cookie需要获取验证码后 返回。r //添加日志记录:r CollectJsonLog(loginVerifyImgRequestUrl, queryData, 0, part2);r //r if (!string.IsNullOrEmpty(part2))r loginVerifyImgStream = part1 + part2;r return loginVerifyImgStream;r }rr public bool LogOut { r bool retValue = true;r return retValue;rr }r public bool Login(out string loginret) {r loginret = string.Empty;rr //loginRequestUrl = "https://uac.10010.com/portal/Service/MallLogin?callback=jQuery172020724007464970884_" + TimeStamp.GetTimeStamp_13 +r // "&req_time=" + TimeStamp.GetTimeStamp_13 + "&redirectURL=http://www.10010.com&userName=" + currentPhoneNumber +r // "&password=" + currentPhoneServicePwd + "&pwdType=01&productType=01&redirectType=01&rememberMe=1&_=" + TimeStamp.GetTimeStamp_13;//服务密码登陆,不带图片验证码rr loginRequestUrl = "https://uac.10010.com/portal/Service/MallLogin?callback=jQuery17208163765012834383_1463034583180&req_time=1463034838271&redirectURL=http://www.10010.com&userName=" + currentPhoneNumber + "&password=" + currentPhoneServicePwd + "&pwdType=01&productType=01&verifyCode=" + currentLoginImgCode + "&uvc=" + currentUVC + "&redirectType=01&rememberMe=1&_=1463034838271";r //服务密码登陆,不带图片验证码rrr bool retValue = false;r string retString = string.Empty;r do {r retString = hhn.HttpGet(loginRequestUrl, "", HttpForType.联通);r Thread.Sleep(500);r }r while (retString.Contains(@"所属省份系统正在升级")); rr //添加日志记录:r CollectJsonLog(loginRequestUrl,"",0,retString);r //r if (retString.Contains(@"resultCode:""7007"""))r loginret = "账户名与密码不匹配";rr if (retString.Contains(@"密码出错已达上限"))r loginret = "密码出错已达上限";rr if (retString.Contains(@"所属省份系统正在升级"))r loginret = "所属省份系统正在升级";rr if (retString.Contains(@"resultCode:""0000"""))r retValue = true;rr return retValue;r }rr /// r /// 联通独有,查账单前进行checkloginr /// r private bool checkLogin {r get {r string url = "http://iservice.10010.com/e3/static/check/checklogin?_="r + TimeStamp.GetTimeStamp_13;r bool retValue = false;rr string retString = hhn.HttpPost(url, "", HttpForType.联通);r //添加日志记录:r CollectJsonLog(url,"",1,retString);r //r if (retString.Contains(@"""isLogin"":true"))r retValue = true;rr return retValue;r }r }rr /// r /// 联通独有r /// r public void UNCInitPage {r string url = "https://login.10010.com/captchazh.htm?type=05";r string retS = hhn.HttpGet(url, "", HttpForType.联通);r //添加日志记录:r CollectJsonLog(url, "", 0, retS);r //r //设置Cookie"WT_FPC"r string wt_fpc = JsHelper.GetJsMethd("GetWT_FPC", null);r CookieCollection hcc = new CookieCollection;r Cookie wtcookie = new Cookie {r Expires = DateTime.Now.AddYears(10),r Path = "/",r Domain = ".10010.com",r Name = "WT_FPC",r Value = "id=2c78d939da42319e6221460629342754:lv=1460686951978:ss=1460685811376"r //Value = wt_fpc.Substring(wt_fpc.IndexOf('=') + 1, wt_fpc.Length - 7)//此处 写死也可以,服务器不做校验。r };r hcc.Add(wtcookie);r hhn.cookie.Add(wtcookie);r }rr public bool SendQuerySms { r return true;r }rr /// r /// 联通无需查询短信验证r /// r /// r /// r public bool IsQuerySmsVerifyOk(string smscode) {r return true;r }rr public IList GetQueryData(T temp) {r return null;r }rr private string getMyDetails {r if (checkLogin) { r string infoUrl = "http://iservice.10010.com/e3/static/query/searchPerInfo/?_=1464073258330&menuid=000100020001";r string retString = hhn.HttpPost(infoUrl, "", HttpForType.联通);r //添加日志记录:r CollectJsonLog(infoUrl,"",1,retString);r //r return retString;r }r return "";r }rr public TXInfoModel GetInfo {r TXInfoModel tim = new TXInfoModel;rrrr #region 第一部分r string infoUrl = "https://uac.10010.com/cust/infomgr/anonymousInfoAJAX";r string retString = hhn.HttpGet(infoUrl, "");r //添加日志记录:r CollectJsonLog(infoUrl,"",0,retString);r //r tim.CustomerName = Utilities.QuMiddle(retString, @"name"":""", @"""");rr tim.CustomerSex = Utilities.QuMiddle(retString, @"sex"":""", @"""")=="1"?"男":"女";r #endregionrr #region 第二部分r string retString2 = getMyDetails;r tim.Email = Utilities.QuMiddle(retString2, @"sendemail"":""", @"""");rr DateTime innettime;r DateTime.TryParse(Utilities.QuMiddle(retString2, @"opendate"":""", @""""), out innettime);r tim.InNetTime = innettime;rr tim.Grade = "";//星级得分r tim.IDCard = Utilities.QuMiddle(retString2, @"certnum"":""", @"""");r tim.PhoneNumber = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); ;r tim.ProviderName = "中国联通:" + Utilities.QuMiddle(retString2, @"brand"":""", @"""") + "-" + Utilities.QuMiddle(retString2, @"productName"":""", @"""");//01 ,02,03r tim.RegAddress = Utilities.QuMiddle(retString2, @"certaddr"":""", @"""");rrr tim.ContactNum = Utilities.QuMiddle(retString2, @"usernumber"":""", @"""");r tim.NetAge = "";r tim.PhoneStatus = Utilities.QuMiddle(retString2, @"subscrbstat"":""", @"""");r tim.RealNameInfo = Utilities.QuMiddle(retString2, @"certtype"":""", @"""");r tim.StarLevel = Utilities.QuMiddle(retString2, @"custlvl"":""", @"""");r tim.LevelInfo = "";r tim.ZipCode = ""; r #endregionrr #region 第三部分 话费余额/储蓄余额 r string infoUrl11 = "http://iservice.10010.com/e3/static/query/accountBalance/search?_=1464858050369&menuid=000100010002";r string retString11 = hhn.HttpPost(infoUrl11, "type=onlyAccount", HttpForType.联通);r //添加日志记录:r CollectJsonLog(infoUrl11, "", 0, retString11);r //r tim.CurFee = Decimal.Parse(Utilities.QuMiddle(retString11, @"userbalance"":""", @""""));r tim.CurFeeTotal = Decimal.Parse(Utilities.QuMiddle(retString11, @"acctbalance"":""", @""""));r #endregionrr #region 第三部分 积分r string infoUrl22 = "http://iservice.10010.com/e3/static/query/headerView";r string retString22 = hhn.HttpPost(infoUrl22, "", HttpForType.联通);r //添加日志记录:r CollectJsonLog(infoUrl22, "", 0, retString22);r //r int score;r int.TryParse(Utilities.QuMiddle(retString22, @"sore"":""", @""""), out score);//联通某些类型卡 返回的json中可能没有 积分这个 字段。r tim.PointValue = score;r #endregionrr #region 第四部分 归属地查询r tim.PhoneAttribution = PhoneAttribution.getGuiShuDiNet(tim.PhoneNumber);r #endregionrrr return tim; r }rrr /// r /// 获取五个月账单概括r /// r /// r public IList GetZhangDan {r IList listZD = new List;r TXZhangDanModel temp;r string infoUrl = "http://iservice.10010.com/e3/static/query/queryHistoryBill?_="+TimeStamp.GetTimeStamp_13 +"&menuid=000100020001";r string retS = string.Empty;r string temptimes;int loopi=0;r List tempss = GetZhangDanPostData(out temptimes);r if (checkLogin) {r foreach (var data in tempss) {r //当前月份的通话账单进行处理!r loopi++;r string PostdataS = data;r retS = hhn.HttpPost(infoUrl, PostdataS, HttpForType.联通);r //添加日志记录:r CollectJsonLog(infoUrl, PostdataS,1,retS);r //r //解析每月账单数据构建modelr DateTime start1,end1;decimal d1;r string temps1= new Regex(@"""billcycle""(:)("".*?"")").Match(retS).ToString;rr if (!string.IsNullOrEmpty(temps1)) {r DateTime.TryParse(Utilities.QuMiddle(temps1, @"billcycle"":""", "至"), out start1);r DateTime.TryParse(Utilities.QuMiddle(temps1, "至", @""""), out end1);r decimal.TryParse(Utilities.QuMiddle(retS, @"nowFee"":""", @""""), out d1);r temp = new TXZhangDanModel {rr billStartDate = start1,r billEndDate = end1,r billFee = d1, r//(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)r };r listZD.Add(temp);r } r else {r decimal d2; decimal.TryParse(Utilities.QuMiddle(retS, @"allfee"":""", @""""), out d2);r temp = new TXZhangDanModel {rr billStartDate = DateTime.Parse( temptimes[loopi-1].Split('&')[0]),r billEndDate = DateTime.Parse( temptimes[loopi-1].Split('&')[1]),r billFee = d2 r };r listZD.Add(temp);r }r }r }rr return listZD; rr }rr /// r /// 获取详单r /// r /// r public IList GetXiangDan {r IList listXD = new List;r TXXiangDanModel temp;r callListRequestUrl = "http://iservice.10010.com/e3/static/query/callDetail?_=" + TimeStamp.GetTimeStamp_13 + "&menuid=000100030001";r //callListRequestData = "beginDate=2016-04-01&endDate=2016-04-20&pageNo=1&pageSize=1000";r string retS = string.Empty;r List tempss = GetXiangDanPostData;r if (checkLogin) {r foreach (var data in tempss) {rr //注意,以下请求获取totalNum当月记录总数。r string PostdataS = "beginDate=" + data.Split('&')[0] + "&endDate=" + data.Split('&')[1] + "&pageNo=1&pageSize=20";r retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);r //添加日志记录:r CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);rr while (retS.Contains("暂时无法为您提供服务")) {r retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);r Thread.Sleep(500);r }r if (retS.Contains("系统检测您的访问过于频繁")) {r throw new Exception("访问获取详单链接过于频繁!请明天再试");r } rrr //注意,以下代码解析当月记录总数,获得所有确定的游标集合。r string currentMonthTotalNum = Utilities.QuMiddle(retS, @"totalRecord"":", @","""); //从上面返回串,获取当月记录总数r List curCurorlist = GetAllcurCuror(currentMonthTotalNum);//游标集合,翻页用.r r foreach (var curcuror in curCurorlist) {r //当前月份的通话详单进行处理!r PostdataS = "beginDate=" + data.Split('&')[0] + "&endDate=" + data.Split('&')[1] + "&pageNo=" + curcuror + "&pageSize=50";r retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);r //添加日志记录:r CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);r //rr //匹配startTime dater MatchCollection stlist = new Regex(@"""calldate""(:)("".*?"")").Matches(retS);r //匹配startTime timer MatchCollection stlist2 = new Regex(@"""calltime""(:)("".*?"")").Matches(retS);rr //匹配commPlacr MatchCollection cplist = new Regex(@"""homeareaName""(:)("".*?"")").Matches(retS);r //匹配commModer MatchCollection cmlist = new Regex(@"""calltypeName""(:)("".*?"")").Matches(retS);r //匹配anotherNm r MatchCollection anlist = new Regex(@"""othernum""(:)("".*?"")").Matches(retS);r //匹配commTime r MatchCollection ctilist = new Regex(@"""calllonghour""(:)("".*?"")").Matches(retS);rr //匹配commType r //MatchCollection ctylist = new Regex(@"""romatype""(:)("".*?"")").Matches(retS);r //匹配commType r MatchCollection ctylist = new Regex(@"""landtype""(:)("".*?"")").Matches(retS);rr //匹配commFee r MatchCollection cflist = new Regex(@"""totalfee""(:)("".*?"")").Matches(retS);rr if ((stlist.Count == cplist.Count) && (cplist.Count == cmlist.Count) && (cmlist.Count == anlist.Count)r && (anlist.Count == ctilist.Count) && (ctilist.Count == ctylist.Count) && (ctylist.Count == cflist.Count)) {rr //解析每月详单数据构建modelr for (int i = 0; i < stlist.Count; i++) {r temp = new TXXiangDanModel {r anotherNm = Utilities.QuMiddle(anlist[i].Value, @"othernum"":""", @""""),r commFee = decimal.Parse(Utilities.QuMiddle(cflist[i].Value, @"totalfee"":""", @"""")),r commMode = Utilities.QuMiddle(cmlist[i].Value, @"calltypeName"":""", @""""),r commPlac = Utilities.QuMiddle(cplist[i].Value, @"homeareaName"":""", @""""),r commTime = Utilities.QuMiddle(ctilist[i].Value, @"calllonghour"":""", @""""),r commType = Utilities.QuMiddle(ctylist[i].Value, @"landtype"":""", @""""),r startTime = DateTime.Parse(Utilities.QuMiddle(stlist[i].Value, @"calldate"":""", @"""")r + " " + Utilities.QuMiddle(stlist2[i].Value, @"calltime"":""", @""""))r };r listXD.Add(temp);r }r }r }r }r }r return listXD;r }rr /// r /// 翻页索引(1,2,3,4,5),用于联通翻页查询账单。默认每页50条记录r /// r /// 当月份总数目r /// r private List GetAllcurCuror(string totalNum) {r List retlist = new List;r int totalnum1;r int.TryParse(totalNum, out totalnum1);r if (totalnum1 == 0)r return retlist;r else {r int yushu = totalnum1 % 50;//比如totalNum 201,余数1r int curcurorCount = totalnum1 / 50 + (yushu == 0 ? 0 : 1);//5页r for (int i = 0; i < curcurorCount; i++) {r retlist.Add((i+1).ToString);//1,2,3,4,5r }r return retlist;r }rr }rr /// r /// 最近5个月账单需要的Post数据r /// r /// r private List GetZhangDanPostData(out string[] startendS) {r // string dataS = "billdate=201604&querycode=0001&querytype=0001";r List retlist = new List; startendS = new string[5];r DateTime nowtime = DateTime.Now;r for (int i = 0; i < 5; i++) {r string mm = nowtime.AddMonths((-1)*i).ToString("yyyyMM");r string dataS = "billdate="+mm+"&querycode=0001&querytype=0001";r string seS;r if(i==0)r seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString + "&" + nowtime.AddMonths((-1) * i).ToString; //整理账单起&止月份,非本月r elser seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString + "&" + new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).AddMonths(1).AddDays(-1).ToString; //整理账单起&止月份,本月r startendS[i] = (seS);r retlist.Add(dataS);r }r return retlist;r }rr /// r /// 获取5个月详单需要的post数据(2016-04-01&2016-04-30格式)r /// r /// r private List GetXiangDanPostData {r List retlist = new List;rrrr int year = DateTime.Now.Year;//当前年 r int mouth = DateTime.Now.Month;//当前月 rr int beforeYear = 0;r int beforeMouth = 0;rr for (int i = 0; i < 5; i++) {r if (mouth <= 1 && i!=0 )//如果当前月是一月,那么年份就要减1 r {r beforeYear = year - i;r beforeMouth = 12;//上个月 r } else {r beforeYear = year;r beforeMouth = mouth - i;//上个月 r }r string beforeMouthOneDay = beforeYear + "-" + beforeMouth + "-" + "01";//上个月第一天 r string beforeMouthLastDay;r if (i != 0)r beforeMouthLastDay = beforeYear + "-" + beforeMouth + "-" + DateTime.DaysInMonth(year, beforeMouth);//上个月最后一天r elser beforeMouthLastDay = DateTime.Now.ToString("yyyy-MM-dd");r retlist.Add(DateTime.Parse(beforeMouthOneDay).ToString("yyyy-MM-dd") + "&" + DateTime.Parse(beforeMouthLastDay).ToString("yyyy-MM-dd"));r }r return retlist;r }rr public static void CollectJsonLog(string url, string data, int method1, string responseS) {r string method = method1 == 1 ? "Post" : "Get";r loglist.Add(string.Format("【请求url:{0} , 请求数据:{1} , 请求方式:{2}, 返回数据:{3} 】", url, data, method, responseS));r }rr public IList GetAllJsonLog {r return loglist;r }r }r}