Files
nCovTrack-Faker/service/faker_data.py
2022-02-07 16:28:14 +08:00

212 lines
8.2 KiB
Python

import copy
import random
import requests
import threading
import random
from concurrent.futures import ThreadPoolExecutor, as_completed
from lxml import etree
from faker import Faker
import utils.model
from model import SexEnum, Person, Hotel, Railway, City
from base import db
from datetime import datetime, timedelta
from dateutil import parser, rrule
from deprecated import deprecated
executor = ThreadPoolExecutor(max_workers=10)
faker = Faker(["zh_CN"])
train_prefix = ['T', 'K', 'D', 'G', 'L', '']
faker_hotel_infos = [
Hotel(hotel_code='1150001', hotel_name='北京华侨大厦', locate_city_id='101011600'),
Hotel(hotel_code='3250069', hotel_name='南京绿地洲际酒店', locate_city_id='101190111'),
Hotel(hotel_code='3350039', hotel_name='义乌锦都酒店', locate_city_id='101210904'),
Hotel(hotel_code='3650011', hotel_name='九江信华建国酒店', locate_city_id='101240211'),
Hotel(hotel_code='3750005', hotel_name='青岛香格里拉大酒店', locate_city_id='101120203'),
Hotel(hotel_code='3250028', hotel_name='徐州开元名都大酒店', locate_city_id='101190811'),
]
faker_train_ids = ['D7359', 'T4108', 'D776', 'D3023', 'K1461', 'L1500', 'G662']
ident_valid_ratio = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
ident_valid_result = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
ident_area = ['320700', '320722', '320707', '320723', '320724', '320706', '320703', '320500', '320581', '320582',
'320583', '320506', '320505', '320509', '320585', '320507', '320508']
# Due to the limit of reptile, turn to query the city by city code
@deprecated
def faker_identification_old() -> Person:
identification = faker.ssn(max_age=70)
resp = requests.get("https://shenfenzheng.bmcx.com/" + identification + "__shenfenzheng/")
html = etree.HTML(resp.text)
infos = html.xpath('//table[@width="100%"]//tr[position()!=2]/td[@bgcolor="#FFFFFF"]//text()')
person = Person(
identification=infos[0],
address=infos[1],
sex=SexEnum(infos[3][0]).value,
name=faker.last_name() + (
faker.first_name_male() if infos[3][0] == SexEnum.MALE.sex else faker.first_name_female()),
age=infos[4][0:-2],
phone=faker.phone_number()
)
return person
def faker_identification() -> Person:
area = ident_area[random.randint(0, len(ident_area) - 1)]
birthdate = datetime(
year=2022 - random.randint(18, 70),
month=random.randint(1, 12),
day=random.randint(1, 28)
)
birthday = birthdate.__format__("%Y%m%d")
random_num = random.randint(100, 999).__str__()
identification = area + birthday + random_num
valid = ident_valid_result[sum([int(identification[i]) * ident_valid_ratio[i] for i in range(17)]) % 11]
identification = identification + valid
birthdate = parser.parse(identification[6:14])
age = rrule.rrule(rrule.YEARLY, dtstart=birthdate, until=datetime.today()).count()
sex = 0 if int(identification[16:17]) % 2 == 0 else 1
return Person(
identification=identification,
sex=sex,
name=faker.last_name() + (faker.first_name_male() if sex == SexEnum.MALE.sex else faker.first_name_female()),
age=age,
phone=faker.phone_number()
)
def faker_identifications(num: int) -> list[Person]:
persons = [faker_identification() for i in range(num)]
area_codes = [person.identification[0:6] for person in persons]
rows = City.query.filter(City.ad_code.in_(area_codes))
code_area_dict = {
row.ad_code: '{} {} {} {}'.format(row.country_cn, row.province_cn, row.admin_district_cn, row.city_cn) for row
in rows}
for person in persons:
person.address = code_area_dict[person.identification[0:6]]
db.session.bulk_save_objects(persons)
return persons
def faker_railways(num: int) -> list[Railway]:
persons = faker_identifications(num)
railways = [faker_railway(person) for person in persons]
db.session.bulk_save_objects(railways)
db.session.commit()
return railways
def faker_railway(person: Person) -> Railway:
return Railway(
identification=person.identification,
phone=person.phone,
name=person.name,
train=faker_train_ids[int(random.random() * len(faker_train_ids))],
launch=datetime.today() + timedelta(days=(int(random.random() * 21) - 10))
)
def generate_train_id():
prefix = train_prefix[int(random.random() * len(train_prefix))]
num = int(random.random() * 10000).__str__()
return prefix + num
def faker_hotels(num: int) -> list[Hotel]:
persons = faker_identifications(num)
hotels = [faker_hotel(person) for person in persons]
db.session.bulk_save_objects(hotels)
db.session.commit()
return hotels
def faker_hotel(person: Person) -> Hotel:
hotel = faker_hotel_infos[random.randint(0, len(faker_hotel_infos) - 1)]
hotel.identification = person.identification
hotel.in_data = datetime.today() + timedelta(days=(int(random.random() * 21) - 10))
hotel.out_data = hotel.in_data + timedelta(days=3)
return hotel
def faker_contacts(num_hotel: int, num_railway: int):
persons = faker_identifications(num_railway + num_hotel + 1)
patient = persons[0]
contacts_hotel = persons[1:1 + num_hotel]
contacts_railway = persons[0 - num_railway:]
hotel_patient = faker_hotel(patient)
railway_patient = faker_railway(patient)
def assemble_hotel(contact: Person) -> Hotel:
hotel_contact = copy.deepcopy(hotel_patient)
hotel_contact.identification = contact.identification
return hotel_contact
def assemble_railway(contact: Person) -> Railway:
railway_contact = copy.deepcopy(railway_patient)
railway_contact.identification = contact.identification
railway_contact.name = contact.name
return railway_contact
hotel_contacts = [assemble_hotel(contact) for contact in contacts_hotel]
railway_contacts = [assemble_railway(contact) for contact in contacts_railway]
hotels = [hotel_patient] + hotel_contacts
railways = [railway_patient] + railway_contacts
db.session.bulk_save_objects(hotels)
db.session.bulk_save_objects(railways)
db.session.commit()
return {
"patient": {
"hotel": utils.model.model2dict(hotel_patient),
"railway": utils.model.model2dict(railway_patient)
},
"contacts": {
"hotel": utils.model.models2dicts(hotel_contacts),
"railway": utils.model.models2dicts(railway_contacts)
}
}
def query_patients() -> list[dict]:
patients = db.session.query(Person) \
.join(Hotel, Person.identification == Hotel.identification) \
.join(Railway, Person.identification == Railway.identification) \
.all()
return [patient.__to_dict__() for patient in patients]
def query_contacts_hotel(identification: str) -> list[dict]:
patient_subquery = Hotel.query.filter(Hotel.identification == identification).subquery()
contacts = db.session.query(Hotel, Person) \
.join(Hotel, Hotel.identification == Person.identification) \
.filter(Person.identification != identification) \
.filter(Hotel.out_data > patient_subquery.c.in_data)
return [merge_object2dict(hotel, person) for hotel, person in contacts]
def query_contacts_railway(identification: str) -> list[dict]:
patient_subquery = Railway.query.filter(Railway.identification == identification).subquery()
contacts = db.session.query(Railway, Person) \
.join(Railway, Railway.identification == Person.identification) \
.filter(Person.identification != identification) \
.filter(Railway.launch == patient_subquery.c.launch)
return [merge_object2dict(railway, person) for railway, person in contacts]
def query_contacts_classify(identification: str) -> dict[str, list[dict]]:
contacts = {'hotel': query_contacts_hotel(identification), 'railway': query_contacts_railway(identification)}
return contacts
def query_contacts(identification: str) -> list[dict]:
contacts = [*query_contacts_hotel(identification), *query_contacts_railway(identification)]
return contacts
def merge_object2dict(*objs: db.Model) -> dict:
res = {}
for obj in objs:
res.update(obj.__to_dict__())
return res