import pandas as pd import random file=open(r"/home/hadoop/SparkProgram/chapter4-data01.txt","r",encoding="utf-8") datas=file.read() file.close() rs=datas.replace("\n",",") data=rs.split(",") names=data[::3] num=len(set(names)) Classids=["class0"+str(random.randint(1,11))for n in range(num)] dataframe=pd.DataFrame(Classids,list(set(names))) dataframe.to_csv("/home/hadoop/SparkProgram/class.csv",encoding="utf-8") print("successful")