112 lines
4.5 KiB
Python
112 lines
4.5 KiB
Python
import random
|
|
import requests
|
|
import threading
|
|
import random
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from lxml import etree
|
|
from faker import Faker
|
|
from model import SexEnum, Person, Hotel, Railway, City
|
|
from base import db
|
|
from datetime import datetime, timedelta
|
|
from dateutil import parser, rrule
|
|
from deprecated import deprecated
|
|
|
|
executor = ThreadPoolExecutor(max_workers=10)
|
|
faker = Faker(["zh_CN"])
|
|
train_prefix = ['T', 'K', 'D', 'G', 'L', '']
|
|
faker_hotel_infos = [
|
|
Hotel(hotel_code='1150001', hotel_name='北京华侨大厦', locate_city_id='101011600'),
|
|
Hotel(hotel_code='3250069', hotel_name='南京绿地洲际酒店', locate_city_id='101190111'),
|
|
Hotel(hotel_code='3350039', hotel_name='义乌锦都酒店', locate_city_id='101210904'),
|
|
Hotel(hotel_code='3650011', hotel_name='九江信华建国酒店', locate_city_id='101240211'),
|
|
Hotel(hotel_code='3750005', hotel_name='青岛香格里拉大酒店', locate_city_id='101120203'),
|
|
Hotel(hotel_code='3250028', hotel_name='徐州开元名都大酒店', locate_city_id='101190811'),
|
|
]
|
|
|
|
faker_train_ids = ['D7359', 'T4108', 'D776', 'D3023', 'K1461', 'L1500', 'G662']
|
|
|
|
ident_valid_ratio = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
|
|
ident_valid_result = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
|
|
ident_area = ['320700', '320722', '320707', '320723', '320724', '320706', '320703', '320500', '320581', '320582',
|
|
'320583', '320506', '320505', '320509', '320585', '320507', '320508']
|
|
|
|
|
|
# Due to the limit of reptile, turn to query the city by city code
|
|
@deprecated
|
|
def faker_identification_old() -> Person:
|
|
identification = faker.ssn(max_age=70)
|
|
resp = requests.get("https://shenfenzheng.bmcx.com/" + identification + "__shenfenzheng/")
|
|
html = etree.HTML(resp.text)
|
|
infos = html.xpath('//table[@width="100%"]//tr[position()!=2]/td[@bgcolor="#FFFFFF"]//text()')
|
|
person = Person(
|
|
identification=infos[0],
|
|
address=infos[1],
|
|
sex=SexEnum(infos[3][0]).value,
|
|
name=faker.last_name() + (
|
|
faker.first_name_male() if infos[3][0] == SexEnum.MALE.sex else faker.first_name_female()),
|
|
age=infos[4][0:-2],
|
|
phone=faker.phone_number()
|
|
)
|
|
return person
|
|
|
|
|
|
def faker_identification() -> Person:
|
|
area = ident_area[random.randint(0, len(ident_area) - 1)]
|
|
birthdate = datetime(
|
|
year=2022 - random.randint(18, 70),
|
|
month=random.randint(1, 12),
|
|
day=random.randint(1, 28)
|
|
)
|
|
birthday = birthdate.__format__("%Y%m%d")
|
|
random_num = random.randint(100, 999).__str__()
|
|
identification = area + birthday + random_num
|
|
valid = ident_valid_result[sum([int(identification[i]) * ident_valid_ratio[i] for i in range(17)]) % 11]
|
|
identification = identification + valid
|
|
birthdate = parser.parse(identification[6:14])
|
|
age = rrule.rrule(rrule.YEARLY, dtstart=birthdate, until=datetime.today()).count()
|
|
sex = 0 if int(identification[16:17]) % 2 == 0 else 1
|
|
return Person(
|
|
identification=identification,
|
|
sex=sex,
|
|
name=faker.last_name() + (faker.first_name_male() if sex == SexEnum.MALE.sex else faker.first_name_female()),
|
|
age=age,
|
|
phone=faker.phone_number()
|
|
)
|
|
|
|
|
|
def faker_identifications(num: int) -> list[Person]:
|
|
persons = [faker_identification() for i in range(num)]
|
|
area_codes = [person.identification[0:6] for person in persons]
|
|
rows = City.query.filter(City.ad_code.in_(area_codes))
|
|
code_area_dict = {row.ad_code: '{} {} {} {}'.format(row.country_cn, row.province_cn, row.admin_district_cn, row.city_cn) for row in rows}
|
|
for person in persons:
|
|
person.address = code_area_dict[person.identification[0:6]]
|
|
db.session.bulk_save_objects(persons)
|
|
return persons
|
|
|
|
|
|
def faker_railways(num: int) -> list[Railway]:
|
|
persons = faker_identifications(num)
|
|
railways = [faker_railway(person) for person in persons]
|
|
db.session.bulk_save_objects(railways)
|
|
db.session.commit()
|
|
return railways
|
|
|
|
|
|
def faker_railway(person: Person) -> Railway:
|
|
return Railway(
|
|
identification=person.identification,
|
|
phone=person.phone,
|
|
name=person.name,
|
|
train=faker_train_ids[int(random.random() * len(faker_train_ids))],
|
|
launch=datetime.today() + timedelta(days=(int(random.random() * 21) - 10))
|
|
)
|
|
|
|
|
|
def generate_train_id():
|
|
prefix = train_prefix[int(random.random() * len(train_prefix))]
|
|
num = int(random.random() * 10000).__str__()
|
|
return prefix + num
|
|
|
|
# def faker_hotel(num: int):
|