python处理数据的导出到Excel

发布时间 2023-11-20 14:41:02作者: 爱家家的卡卡
import datetime
import json
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text

# 数据库连接配置,请根据你的实际情况修改
db_config = {
    'host': 'your_database_host',
    'user': 'your_database_user',
    'password': 'your_database_password',
    'database': 'your_database_name'
}

# 使用 SQLAlchemy 创建数据库连接,并获取 Connection 对象
engine = create_engine (
    f"mysql+mysqlconnector://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}")
connection = engine.connect ()

# 查询onboard_companies表中status等于1的数据
companies_query = text ("SELECT id,ap_cid,company_name FROM onboard_companies WHERE status = 1")
# 尝试读取数据到DataFrame
companies_df = pd.read_sql (companies_query, connection)
# print(companies_df)

# 查询labels表中status等于2的数据
labels_query = text ("SELECT id,name FROM labels WHERE status = 2 AND id IN (4,9)")
# 尝试读取数据到DataFrame
labels_df = pd.read_sql (labels_query, connection)

# 查询label_fields表中与labels表关联的数据
label_fields_query = text ("""
    SELECT lf.id, lf.label_id, lf.name, lf.type, lf.field_type,lf.is_required, lf.options
    FROM label_fields lf
    INNER JOIN labels l ON lf.label_id = l.id
    WHERE l.status = 2 AND l.id IN (4,9) AND lf.status = 1 AND lf.type != 7
""")
# 尝试读取数据到DataFrame
label_fields_df = pd.read_sql (label_fields_query, connection)

# 关闭数据库连接
connection.close ()

# 转换type和is_required的值
type_mapping = {
    1: "Single-select dropdown list (Self-defined)",
    2: "Single-select dropdown list (Company ID)",
    3: "Multi-select dropdown list (Self-defined)",
    4: "Multi-select dropdown list (Company ID)",
    5: "Multi-select dropdown list (Trade Code)",
    6: "Calendar",
    8: "Free text",
}

field_type_mapping = {
    1: "dropdown",
    2: "checkbox",
    3: "textarea",
    4: "date",
    5: "file",
    6: "multidropdow",
}

is_required_mapping = {
    0: "可选",
    1: "必填",
}


# 处理options列
def process_options(options):
    if pd.notna (options):
        options_list = json.loads (options)
        options_str = ",".join (options_list)
        return options_str
    return None


label_fields_df['type'] = label_fields_df['type'].map (type_mapping)
label_fields_df['field_type'] = label_fields_df['field_type'].map (field_type_mapping)
label_fields_df['is_required'] = label_fields_df['is_required'].map (is_required_mapping)
label_fields_df['options'] = label_fields_df['options'].apply (process_options)

# 合并数据
merged_df = pd.merge (labels_df, label_fields_df, left_on='id', right_on='label_id')

# 选择需要的列
result_df = merged_df[['id_x', 'name_x', 'id_y', 'name_y', 'type', 'field_type', 'is_required', 'options']]
# print(result_df)

# 创建一个新的DataFrame来存储组合后的结果
combined_data = pd.DataFrame ()
# 使用嵌套的for循环组合数据
for _, company_row in companies_df.iterrows ():
    for _, result_row in result_df.iterrows ():
        # 创建一个新的行,包含公司数据和结果数据
        new_row = {
            'company_id': company_row['id'],
            'company_ap_cid': company_row['ap_cid'],
            'company_name': company_row['company_name'],
            'result_id_x': result_row['id_x'],
            'result_name_x': result_row['name_x'],
            'result_id_y': result_row['id_y'],
            'result_name_y': result_row['name_y'],
            'result_type': result_row['type'],
            'result_field_type': result_row['field_type'],
            'result_is_required': result_row['is_required'],
            'result_options': result_row['options'],
            'input_options': ''
        }
        # 将新行添加到组合后的数据框中
        combined_data = pd.concat ([combined_data, pd.DataFrame ([new_row])], ignore_index=True)

# 打印结果
# print(combined_data)

combined_data.columns = ['Company Id', 'Company AP ID', 'Company Name', 'Label Id', 'Label Name', 'Label Field Id',
                         'Label Field Name', 'Label Field Type', 'Label Field Val Type', 'is Required', 'Optional options', 'Input options']
# print(combined_data)

# 获取当前日期和时间
current_datetime = datetime.datetime.now ()

# 把日期和事件格式化为字符串
formatted_datetime = current_datetime.strftime ("%Y%m%d_%H%M%S")

# 在当前文件夹下的 "outFiles" 文件夹中保存 Excel 文件
output_folder = "outFiles"

# 将格式化的日期时间与所需的文件名连接起来
output_file_name = f"outPutLabel_{formatted_datetime}.xlsx"

# 如果 "outFiles" 文件夹不存在,则创建它
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# 拼接输出文件的完整路径
output_file_path = os.path.join(output_folder, output_file_name)

# 将数据框架写入 Excel 文件
combined_data.to_excel (output_file_path, index=False)

print(f"Excel 文件已保存到 {output_file_path}")