import random import requests import threading import random from concurrent.futures import ThreadPoolExecutor, as_completed from lxml import etree from faker import Faker from model import SexEnum, Person from base import db executor = ThreadPoolExecutor(max_workers=10) faker = Faker(["zh_CN"]) train_prefix = ['T', 'K', 'D', 'G', 'L', ''] def faker_identification() -> Person: identification = faker.ssn(max_age=70) resp = requests.get("https://shenfenzheng.bmcx.com/" + identification + "__shenfenzheng/") html = etree.HTML(resp.text) infos = html.xpath('//table[@width="100%"]//tr[position()!=2]/td[@bgcolor="#FFFFFF"]//text()') person = Person( identification=infos[0], address=infos[1], sex=SexEnum(infos[3][0]).value, name=faker.last_name() + ( faker.first_name_male() if infos[3][0] == SexEnum.MALE.sex else faker.first_name_female()), age=infos[4][0:-2], phone=faker.phone_number() ) return person def time(times): return times def faker_identifications(num: int) -> list[Person]: all_tasks = [executor.submit(faker_identification) for i in range(num)] futures = as_completed(all_tasks) persons = [future.result() for future in futures] return persons def store_faker_identification(num: int) -> list[Person]: persons = faker_identifications(num) db.session.add_all(persons) db.session.commit() return persons def faker_train(num: int) -> list[str]: return [generate_train_id() for i in range(num)] def generate_train_id(): prefix = train_prefix[int(random.random() * len(train_prefix))] num = int(random.random() * 10000).__str__() return prefix + num