Files
nCovTrack-Faker/service/faker_data.py
2022-01-27 18:00:03 +08:00

99 lines
3.6 KiB
Python

import random
import requests
import threading
import random
from concurrent.futures import ThreadPoolExecutor, as_completed
from lxml import etree
from faker import Faker
from model import SexEnum, Person, Hotel, Railway, City
from base import db
from datetime import datetime, timedelta
from dateutil import parser, rrule
from deprecated import deprecated
executor = ThreadPoolExecutor(max_workers=10)
faker = Faker(["zh_CN"])
train_prefix = ['T', 'K', 'D', 'G', 'L', '']
faker_hotel_infos = [
Hotel(hotel_code='1150001', hotel_name='北京华侨大厦', locate_city_id='101011600'),
Hotel(hotel_code='3250069', hotel_name='南京绿地洲际酒店', locate_city_id='101190111'),
Hotel(hotel_code='3350039', hotel_name='义乌锦都酒店', locate_city_id='101210904'),
Hotel(hotel_code='3650011', hotel_name='九江信华建国酒店', locate_city_id='101240211'),
Hotel(hotel_code='3750005', hotel_name='青岛香格里拉大酒店', locate_city_id='101120203'),
Hotel(hotel_code='3250028', hotel_name='徐州开元名都大酒店', locate_city_id='101190811'),
]
faker_train_ids = ['D7359', 'T4108', 'D776', 'D3023', 'K1461', 'L1500', 'G662']
# Due to the limit of reptile, turn to query the city by city code
@deprecated
def faker_identification_old() -> Person:
identification = faker.ssn(max_age=70)
resp = requests.get("https://shenfenzheng.bmcx.com/" + identification + "__shenfenzheng/")
html = etree.HTML(resp.text)
infos = html.xpath('//table[@width="100%"]//tr[position()!=2]/td[@bgcolor="#FFFFFF"]//text()')
person = Person(
identification=infos[0],
address=infos[1],
sex=SexEnum(infos[3][0]).value,
name=faker.last_name() + (
faker.first_name_male() if infos[3][0] == SexEnum.MALE.sex else faker.first_name_female()),
age=infos[4][0:-2],
phone=faker.phone_number()
)
return person
def faker_identification() -> Person:
identification = faker.ssn(max_age=70)
birthday = parser.parse(identification[6:14])
age = rrule.rrule(rrule.YEARLY, dtstart=birthday, until=datetime.today())
sex = 0 if int(identification[16:17]) % 2 == 0 else 1
# Better out of cycle
city = db.session.query(City).filter(City.ad_code == identification[0:6]).one()
address = '{} {} {} {}'.format(city.country_cn, city.province_cn, city.admin_district_cn, city.city_cn)
return Person(
identification=identification,
address=address,
sex=sex,
name=faker.last_name() + (faker.first_name_male() if sex == SexEnum.MALE.sex else faker.first_name_female()),
age=age,
phone=faker.phone_number()
)
def faker_identifications(num: int) -> list[Person]:
all_tasks = [executor.submit(faker_identification) for i in range(num)]
futures = as_completed(all_tasks)
persons = [future.result() for future in futures]
db.session.add_all(persons)
db.session.commit()
return persons
def faker_railways(num: int) -> list[Railway]:
persons = faker_identifications(num)
railways = [faker_railway(person) for person in persons]
db.session.add_all(railways)
db.session.commit()
return railways
def faker_railway(person: Person) -> Railway:
return Railway(
identification=person.identification,
phone=person.phone,
name=person.name,
train=faker_train_ids[int(random.random() * len(faker_train_ids))],
launch=datetime.today() + timedelta(days=(int(random.random() * 21) - 10))
)
def generate_train_id():
prefix = train_prefix[int(random.random() * len(train_prefix))]
num = int(random.random() * 10000).__str__()
return prefix + num
# def faker_hotel(num: int):