写入数据过多无法导出结果怎么办
有9万多条数据 想获得最后的结果
import pandas as pd
# Step 1: 读取Excel文件
df = xl("$A:$N", headers=True, sheet_name="工作表1")
# Step 2: 确保 'DATE' 列为日期类型,并按日期排序
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce') # 确保日期格式正确
df = df.sort_values(by='DATE')
# Step 3: 初始化字典来跟踪账户的余额
id_balances = {}
# Step 4: 添加新的'表单类型'列,提前分配
df['表单类型'] = None
# Step 5: 定义一个函数来进行标注
def classify_transaction(row, id_balances):
account_id = row['转出方一代账户ID']
amount = row['转账现金']
# 初始化账户的余额
if account_id not in id_balances:
id_balances[account_id] = {'续费余额': 0, '新单余额': 0, 'had_new_order': False}
# 获取账户的状态
balance = id_balances[account_id]
if amount > 0:
# 如果没有新单,第一笔加款标记为新单
if not balance['had_new_order']:
id_balances[account_id]['新单余额'] += amount
id_balances[account_id]['had_new_order'] = True
return '新单'
else:
# 否则标记为续费
id_balances[account_id]['续费余额'] += amount
return '续费'
else:
# 处理退款
if balance['续费余额'] + amount >= 0:
id_balances[account_id]['续费余额'] += amount
return '续费'
elif balance['续费余额'] > 0:
renew_deficit = abs(balance['续费余额'] + amount)
if renew_deficit <= balance['新单余额']:
id_balances[account_id]['续费余额'] = 0
id_balances[account_id]['新单余额'] -= renew_deficit
return f'续费({balance["续费余额"]}), 新单({renew_deficit})'
else:
return '异常'
elif balance['续费余额'] == 0 and balance['新单余额'] >= abs(amount):
id_balances[account_id]['新单余额'] += amount
return '新单'
else:
return '异常'
# Step 6: 使用 apply 方法来向量化操作,避免循环
df['表单类型'] = df.apply(lambda row: classify_transaction(row, id_balances), axis=1)
# 显示结果
print(df)
@金山办公