fix chunked prefill bugs and refactor
This commit is contained in:
@@ -22,8 +22,9 @@ class Sequence:
|
||||
self.last_token = token_ids[-1]
|
||||
self.num_tokens = len(self.token_ids)
|
||||
self.num_prompt_tokens = len(token_ids)
|
||||
self.num_cached_tokens = 0 # tokens that don't need prefill
|
||||
self.num_cached_tokens = 0
|
||||
self.num_scheduled_tokens = 0
|
||||
self.is_prefill = True
|
||||
self.block_table = []
|
||||
self.temperature = sampling_params.temperature
|
||||
self.max_tokens = sampling_params.max_tokens
|
||||
@@ -69,7 +70,7 @@ class Sequence:
|
||||
self.num_tokens += 1
|
||||
|
||||
def __getstate__(self):
|
||||
last_state = self.token_ids if self.num_completion_tokens == 0 or self.num_cached_tokens < self.num_tokens else self.last_token
|
||||
last_state = self.last_token if not self.is_prefill else self.token_ids
|
||||
return (self.num_tokens, self.num_prompt_tokens, self.num_cached_tokens, self.num_scheduled_tokens, self.block_table, last_state)
|
||||
|
||||
def __setstate__(self, state):
|
||||
|
||||
Reference in New Issue
Block a user