go 协程比 Python 多进程快好多! - V2EX
V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
请不要在回答技术问题时复制粘贴 AI 生成的内容
777777

go 协程比 Python 多进程快好多!

  •  
  •   777777 2023 年 12 月 13 日 1366 次点击
    这是一个创建于 857 天前的主题,其中的信息可能已经有所发展或是发生改变。

    需求:2 个 list(alist,blist),alist 每个值与 blist 每个值做字符串相似度计算,两个 list 数量级为 20 万 下面为 python 和 go 的代码片段 python:

    # 计算两个字符串的相似度 def similar(a, b): """计算两个字符串的相似度。如果有一个是 None ,则返回 0 。""" if a is None or b is None: return 0 similarity = fuzz.token_set_ratio(a, b.lower()) / 100 print("similar a:", a, ",", "b:", b, ", similarity:", similarity) return similarity def compute_similarity(args): record_name, name = args return similar(record_name, name), name # 更新数据库记录 def update_database(cursor, name_mapping, csv_data): update_sql = "UPDATE tweb_fingerprint_test SET factory = %s WHERE uuid = %s" num = 0 # 创建一个反向映射,使我们可以快速地通过名称查找 UUID name_to_uuid = defaultdict(list) for uuid, names in name_mapping.items(): for name in names: if name: # 检查 name 是否为 None 或空 name_to_uuid[name].append(uuid) updates = [] with ProcessPoolExecutor() as executor: for row in csv_data: vendor_name = row.get("vendor") record_name = row.get("name") print("record_name:", record_name) if ( vendor_name is None or record_name is None or vendor_name in ["未知", "None"] ): continue # 跳过这行数据 # 直接查找名称 uuids_to_update = name_to_uuid.get(record_name, []) # 如果没有直接匹配,尝试查找相似度超过 98%的名称 if not uuids_to_update: tasks = [(record_name, name) for name in name_to_uuid] results = executor.map(compute_similarity, tasks) uuids_to_update.extend( name_to_uuid[name] for similarity, name in results if similarity > 0.98 ) # 如果找到 UUID ,加入到更新列表中 for uuid_to_update in uuids_to_update: updates.append((vendor_name, uuid_to_update)) # 批量更新 if updates: cursor.executemany(update_sql, updates) num = len(updates) # 返回更新的记录数 return num 

    go:

    // Similar calculates the similarity between two strings func Similar(a, b string) float64 { return smetrics.JaroWinkler(a, b, 0.7, 4) } // UpdateDatabase updates the database with the new vendor information // UpdateDatabase updates the database with the new vendor information func UpdateDatabase(db *sql.DB, vendors map[string]Vendor, records []CSVRecord) (int, error) { fmt.Println("records", len(records)) fmt.Println("vendors", len(vendors)) stmt, err := db.Prepare("UPDATE tweb_fingerprint SET factory = ? WHERE uuid = ?") if err != nil { return 0, err } defer stmt.Close() var wg sync.WaitGroup updates := make(chan Updatedata, len(records)) for _, record := range records { wg.Add(1) go func(record CSVRecord) { defer wg.Done() // fmt.Println(record.Name) for _, vendor := range vendors { if record.Name == vendor.Name.String || Similar(record.Name, vendor.Name.String) > SimilarityThreshold { updates <- Updatedata{ UUID: vendor.UUI, Factory: record.Vendor, } } } }(record) } go func() { wg.Wait() close(updates) }() count := 0 for update := range updates { fmt.Println("update:", update) if _, err := stmt.Exec(update.Factory, update.UUID); err != nil { return count, err } count++ } return count, nil } 
    Baloneo
        1
    Baloneo  
       2023 年 12 月 13 日
    快多少?
    777777
        2
    777777  
    OP
       2023 年 12 月 13 日
    @Baloneo 至少 10 倍吧,python CPU 都打不满,没跑完我就重构成 go 了,go 十分钟就跑完了
    关于     帮助文档     自助推广系统     博客     API     FAQ     Solana     975 人在线   最高记录 6679       Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 30ms UTC 19:10 PVG 03:10 LAX 12:10 JFK 15:10
    Do have faith in what you're doing.
    ubao msn snddm index pchome yahoo rakuten mypaper meadowduck bidyahoo youbao zxmzxm asda bnvcg cvbfg dfscv mmhjk xxddc yybgb zznbn ccubao uaitu acv GXCV ET GDG YH FG BCVB FJFH CBRE CBC GDG ET54 WRWR RWER WREW WRWER RWER SDG EW SF DSFSF fbbs ubao fhd dfg ewr dg df ewwr ewwr et ruyut utut dfg fgd gdfgt etg dfgt dfgd ert4 gd fgg wr 235 wer3 we vsdf sdf gdf ert xcv sdf rwer hfd dfg cvb rwf afb dfh jgh bmn lgh rty gfds cxv xcv xcs vdas fdf fgd cv sdf tert sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf shasha9178 shasha9178 shasha9178 shasha9178 shasha9178 liflif2 liflif2 liflif2 liflif2 liflif2 liblib3 liblib3 liblib3 liblib3 liblib3 zhazha444 zhazha444 zhazha444 zhazha444 zhazha444 dende5 dende denden denden2 denden21 fenfen9 fenf619 fen619 fenfe9 fe619 sdf sdf sdf sdf sdf zhazh90 zhazh0 zhaa50 zha90 zh590 zho zhoz zhozh zhozho zhozho2 lislis lls95 lili95 lils5 liss9 sdf0ty987 sdft876 sdft9876 sdf09876 sd0t9876 sdf0ty98 sdf0976 sdf0ty986 sdf0ty96 sdf0t76 sdf0876 df0ty98 sf0t876 sd0ty76 sdy76 sdf76 sdf0t76 sdf0ty9 sdf0ty98 sdf0ty987 sdf0ty98 sdf6676 sdf876 sd876 sd876 sdf6 sdf6 sdf9876 sdf0t sdf06 sdf0ty9776 sdf0ty9776 sdf0ty76 sdf8876 sdf0t sd6 sdf06 s688876 sd688 sdf86