import copy import random import requests import threading import random from concurrent.futures import ThreadPoolExecutor, as_completed from lxml import etree from faker import Faker import utils.model from model import SexEnum, Person, Hotel, Railway, City from base import db from datetime import datetime, timedelta from dateutil import parser, rrule from deprecated import deprecated executor = ThreadPoolExecutor(max_workers=10) faker = Faker(["zh_CN"]) train_prefix = ['T', 'K', 'D', 'G', 'L', ''] faker_hotel_infos = [ Hotel(hotel_code='1150001', hotel_name='北京华侨大厦', locate_city_id='101011600'), Hotel(hotel_code='3250069', hotel_name='南京绿地洲际酒店', locate_city_id='101190111'), Hotel(hotel_code='3350039', hotel_name='义乌锦都酒店', locate_city_id='101210904'), Hotel(hotel_code='3650011', hotel_name='九江信华建国酒店', locate_city_id='101240211'), Hotel(hotel_code='3750005', hotel_name='青岛香格里拉大酒店', locate_city_id='101120203'), Hotel(hotel_code='3250028', hotel_name='徐州开元名都大酒店', locate_city_id='101190811'), ] faker_train_ids = ['D7359', 'T4108', 'D776', 'D3023', 'K1461', 'L1500', 'G662'] ident_valid_ratio = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] ident_valid_result = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2'] ident_area = ['320700', '320722', '320707', '320723', '320724', '320706', '320703', '320500', '320581', '320582', '320583', '320506', '320505', '320509', '320585', '320507', '320508'] # Due to the limit of reptile, turn to query the city by city code @deprecated def faker_identification_old() -> Person: identification = faker.ssn(max_age=70) resp = requests.get("https://shenfenzheng.bmcx.com/" + identification + "__shenfenzheng/") html = etree.HTML(resp.text) infos = html.xpath('//table[@width="100%"]//tr[position()!=2]/td[@bgcolor="#FFFFFF"]//text()') person = Person( identification=infos[0], address=infos[1], sex=SexEnum(infos[3][0]).value, name=faker.last_name() + ( faker.first_name_male() if infos[3][0] == SexEnum.MALE.sex else faker.first_name_female()), age=infos[4][0:-2], phone=faker.phone_number() ) return person def faker_identification() -> Person: area = ident_area[random.randint(0, len(ident_area) - 1)] birthdate = datetime( year=2022 - random.randint(18, 70), month=random.randint(1, 12), day=random.randint(1, 28) ) birthday = birthdate.__format__("%Y%m%d") random_num = random.randint(100, 999).__str__() identification = area + birthday + random_num valid = ident_valid_result[sum([int(identification[i]) * ident_valid_ratio[i] for i in range(17)]) % 11] identification = identification + valid birthdate = parser.parse(identification[6:14]) age = rrule.rrule(rrule.YEARLY, dtstart=birthdate, until=datetime.today()).count() sex = 0 if int(identification[16:17]) % 2 == 0 else 1 return Person( identification=identification, sex=sex, name=faker.last_name() + (faker.first_name_male() if sex == SexEnum.MALE.sex else faker.first_name_female()), age=age, phone=faker.phone_number() ) def faker_identifications(num: int) -> list[Person]: persons = [faker_identification() for i in range(num)] area_codes = [person.identification[0:6] for person in persons] rows = City.query.filter(City.ad_code.in_(area_codes)) code_area_dict = { row.ad_code: '{} {} {} {}'.format(row.country_cn, row.province_cn, row.admin_district_cn, row.city_cn) for row in rows} for person in persons: person.address = code_area_dict[person.identification[0:6]] db.session.bulk_save_objects(persons) return persons def faker_railways(num: int) -> list[Railway]: persons = faker_identifications(num) railways = [faker_railway(person) for person in persons] db.session.bulk_save_objects(railways) db.session.commit() return railways def faker_railway(person: Person) -> Railway: return Railway( identification=person.identification, phone=person.phone, name=person.name, train=faker_train_ids[int(random.random() * len(faker_train_ids))], launch=datetime.today() + timedelta(days=(int(random.random() * 21) - 10)) ) def generate_train_id(): prefix = train_prefix[int(random.random() * len(train_prefix))] num = int(random.random() * 10000).__str__() return prefix + num def faker_hotels(num: int) -> list[Hotel]: persons = faker_identifications(num) hotels = [faker_hotel(person) for person in persons] db.session.bulk_save_objects(hotels) db.session.commit() return hotels def faker_hotel(person: Person) -> Hotel: hotel = faker_hotel_infos[random.randint(0, len(faker_hotel_infos) - 1)] hotel.identification = person.identification hotel.in_data = datetime.today() + timedelta(days=(int(random.random() * 21) - 10)) hotel.out_data = hotel.in_data + timedelta(days=3) return hotel def faker_contacts(num_hotel: int, num_railway: int): persons = faker_identifications(num_railway + num_hotel + 1) patient = persons[0] contacts_hotel = persons[1:1 + num_hotel] contacts_railway = persons[0 - num_railway:] hotel_patient = faker_hotel(patient) railway_patient = faker_railway(patient) def assemble_hotel(contact: Person) -> Hotel: hotel_contact = copy.deepcopy(hotel_patient) hotel_contact.identification = contact.identification return hotel_contact def assemble_railway(contact: Person) -> Railway: railway_contact = copy.deepcopy(railway_patient) railway_contact.identification = contact.identification railway_contact.name = contact.name return railway_contact hotel_contacts = [assemble_hotel(contact) for contact in contacts_hotel] railway_contacts = [assemble_railway(contact) for contact in contacts_railway] hotels = [hotel_patient] + hotel_contacts railways = [railway_patient] + railway_contacts db.session.bulk_save_objects(hotels) db.session.bulk_save_objects(railways) db.session.commit() return { "patient": { "hotel": utils.model.model2dict(hotel_patient), "railway": utils.model.model2dict(railway_patient) }, "contacts": { "hotel": utils.model.models2dicts(hotel_contacts), "railway": utils.model.models2dicts(railway_contacts) } } def query_patients() -> list[dict]: patients = db.session.query(Person) \ .join(Hotel, Person.identification == Hotel.identification) \ .join(Railway, Person.identification == Railway.identification) \ .all() return [patient.__to_dict__() for patient in patients] def query_contacts_hotel(identification: str) -> list[dict]: patient_subquery = Hotel.query.filter(Hotel.identification == identification).subquery() contacts = db.session.query(Hotel, Person) \ .join(Hotel, Hotel.identification == Person.identification) \ .filter(Person.identification != identification) \ .filter(Hotel.out_data > patient_subquery.c.in_data) return [merge_object2dict(hotel, person) for hotel, person in contacts] def query_contacts_railway(identification: str) -> list[dict]: patient_subquery = Railway.query.filter(Railway.identification == identification).subquery() contacts = db.session.query(Railway, Person) \ .join(Railway, Railway.identification == Person.identification) \ .filter(Person.identification != identification) \ .filter(Railway.launch == patient_subquery.c.launch) return [merge_object2dict(railway, person) for railway, person in contacts] def query_contacts_classify(identification: str) -> dict[str, list[dict]]: contacts = {'hotel': query_contacts_hotel(identification), 'railway': query_contacts_railway(identification)} return contacts def query_contacts(identification: str) -> list[dict]: contacts = [*query_contacts_hotel(identification), *query_contacts_railway(identification)] return contacts def merge_object2dict(*objs: db.Model) -> dict: res = {} for obj in objs: res.update(obj.__to_dict__()) return res