This topic created in 4104 days ago, the information mentioned may be changed or developed.
# -*-coding: utf-8-*-
import re
import codecs
# To match Unicode Characters.
pattern = re.compile(r'[^a-zA-Z\W\d_]+',re.UNICODE)
with codecs.open('file', 'r', 'utf8') as f:
for line in f:
match = pattern.search(line)
# Yeah, it only matches the first group.
if match:
print match.group()