import random import requests import threading import random from concurrent.futures import ThreadPoolExecutor, as_completed from lxml import etree from faker import Faker from model import SexEnum, Person, Hotel, Railway, City from base import db from datetime import datetime, timedelta from dateutil import parser, rrule from deprecated import deprecated executor = ThreadPoolExecutor(max_workers=10) faker = Faker(["zh_CN"]) train_prefix = ['T', 'K', 'D', 'G', 'L', ''] faker_hotel_infos = [ Hotel(hotel_code='1150001', hotel_name='北京华侨大厦', locate_city_id='101011600'), Hotel(hotel_code='3250069', hotel_name='南京绿地洲际酒店', locate_city_id='101190111'), Hotel(hotel_code='3350039', hotel_name='义乌锦都酒店', locate_city_id='101210904'), Hotel(hotel_code='3650011', hotel_name='九江信华建国酒店', locate_city_id='101240211'), Hotel(hotel_code='3750005', hotel_name='青岛香格里拉大酒店', locate_city_id='101120203'), Hotel(hotel_code='3250028', hotel_name='徐州开元名都大酒店', locate_city_id='101190811'), ] faker_train_ids = ['D7359', 'T4108', 'D776', 'D3023', 'K1461', 'L1500', 'G662'] # Due to the limit of reptile, turn to query the city by city code @deprecated def faker_identification_old() -> Person: identification = faker.ssn(max_age=70) resp = requests.get("https://shenfenzheng.bmcx.com/" + identification + "__shenfenzheng/") html = etree.HTML(resp.text) infos = html.xpath('//table[@width="100%"]//tr[position()!=2]/td[@bgcolor="#FFFFFF"]//text()') person = Person( identification=infos[0], address=infos[1], sex=SexEnum(infos[3][0]).value, name=faker.last_name() + ( faker.first_name_male() if infos[3][0] == SexEnum.MALE.sex else faker.first_name_female()), age=infos[4][0:-2], phone=faker.phone_number() ) return person def faker_identification() -> Person: identification = faker.ssn(max_age=70) birthday = parser.parse(identification[6:14]) age = rrule.rrule(rrule.YEARLY, dtstart=birthday, until=datetime.today()) sex = 0 if int(identification[16:17]) % 2 == 0 else 1 # Better out of cycle city = db.session.query(City).filter(City.ad_code == identification[0:6]).one() address = '{} {} {} {}'.format(city.country_cn, city.province_cn, city.admin_district_cn, city.city_cn) return Person( identification=identification, address=address, sex=sex, name=faker.last_name() + (faker.first_name_male() if sex == SexEnum.MALE.sex else faker.first_name_female()), age=age, phone=faker.phone_number() ) def faker_identifications(num: int) -> list[Person]: all_tasks = [executor.submit(faker_identification) for i in range(num)] futures = as_completed(all_tasks) persons = [future.result() for future in futures] db.session.add_all(persons) db.session.commit() return persons def faker_railways(num: int) -> list[Railway]: persons = faker_identifications(num) railways = [faker_railway(person) for person in persons] db.session.add_all(railways) db.session.commit() return railways def faker_railway(person: Person) -> Railway: return Railway( identification=person.identification, phone=person.phone, name=person.name, train=faker_train_ids[int(random.random() * len(faker_train_ids))], launch=datetime.today() + timedelta(days=(int(random.random() * 21) - 10)) ) def generate_train_id(): prefix = train_prefix[int(random.random() * len(train_prefix))] num = int(random.random() * 10000).__str__() return prefix + num # def faker_hotel(num: int):