我想找到两个相似的字符串。在
示例:
from fuzzywuzzy import fuzz string1 = 'Green apple' string2 = 'Apple, green' string3 = 'Green apples - grow on trees' #Test with Fuzzy Wuzzy print(fuzz.partial_ratio(string1, string2)) > 50 print(fuzz.partial_ratio(string1, string3)) > 100 print(fuzz.partial_ratio(string2, string3)) > 58 #Testing with DiffLib SequenceMatcher print(difflib.SequenceMatcher(None, string1, string2).ratio()) > 0.34782608695652173 print(difflib.SequenceMatcher(None, string1, string3).ratio()) > 0.5641025641025641 print(difflib.SequenceMatcher(None, string2, string3).ratio()) > 0.45
在fuzzywuzzy
中还有另一个方法叫做partial_token_set_ratio
。我想这能解决你的问题
from fuzzywuzzy import fuzz string1 = 'Green apple' string2 = 'Apple, green' string3 = 'Green apples - grow on trees' fuzz.partial_token_set_ratio(string1,string3) 100 fuzz.partial_token_set_ratio(string1,string2) 100 string4="apple" fuzz.partial_token_set_ratio(string1,string4) 100 fuzz.partial_token_set_ratio(string4,string1) 100 string4="app" fuzz.partial_token_set_ratio(string4,string1) 100 string4="appld" fuzz.partial_token_set_ratio(string4,string1) 80
1 from fuzzywuzzy import fuzz 2 from fuzzywuzzy import process 3 4 state_to_code = {"VERMONT": "VT", "GEORGIA": "GA", "IOWA": "IA", "Armed Forces Pacific": "AP", "GUAM": "GU", 5 "KANSAS": "KS", "FLORIDA": "FL", "AMERICAN SAMOA": "AS", "NORTH CAROLINA": "NC", "HAWAII": "HI", 6 "NEW YORK": "NY", "CALIFORNIA": "CA", "ALABAMA": "AL", "IDAHO": "ID", 7 "FEDERATED STATES OF MICRONESIA": "FM", 8 "Armed Forces Americas": "AA", "DELAWARE": "DE", "ALASKA": "AK", "ILLINOIS": "IL", 9 "Armed Forces Africa": "AE", "SOUTH DAKOTA": "SD", "CONNECTICUT": "CT", "MONTANA": "MT", 10 "MASSACHUSETTS": "MA", 11 "PUERTO RICO": "PR", "Armed Forces Canada": "AE", "NEW HAMPSHIRE": "NH", "MARYLAND": "MD", 12 "NEW MEXICO": "NM", 13 "MISSISSIPPI": "MS", "TENNESSEE": "TN", "PALAU": "PW", "COLORADO": "CO", 14 "Armed Forces Middle East": "AE", 15 "NEW JERSEY": "NJ", "UTAH": "UT", "MICHIGAN": "MI", "WEST VIRGINIA": "WV", "WASHINGTON": "WA", 16 "MINNESOTA": "MN", "OREGON": "OR", "VIRGINIA": "VA", "VIRGIN ISLANDS": "VI", "MARSHALL ISLANDS": "MH", 17 "WYOMING": "WY", "OHIO": "OH", "SOUTH CAROLINA": "SC", "INDIANA": "IN", "NEVADA": "NV", 18 "LOUISIANA": "LA", 19 "NORTHERN MARIANA ISLANDS": "MP", "NEBRASKA": "NE", "ARIZONA": "AZ", "WISCONSIN": "WI", 20 "NORTH DAKOTA": "ND", 21 "Armed Forces Europe": "AE", "PENNSYLVANIA": "PA", "OKLAHOMA": "OK", "KENTUCKY": "KY", 22 "RHODE ISLAND": "RI", 23 "DISTRICT OF COLUMBIA": "DC", "ARKANSAS": "AR", "MISSOURI": "MO", "TEXAS": "TX", "MAINE": "ME" 24 } 25 def studyfuzzy(): 26 process.extractOne("Minnesotta", choices=state_to_code.keys()) 27 process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=80) 28 process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=96) 29 30 state_to_code.keys() 31 state_to_code.values() 32 state_to_code.viewkeys() 33 state_to_code.viewvalues() 34 state_to_code.viewitems() 35 process.extractOne("AlaBAMMazzz", choices=state_to_code.keys(), score_cutoff=80) 36 process.extractOne("AlaBAMMazzz",choices=state_to_code.keys())
In[6]: from fuzzywuzzy import fuzz In[7]: from fuzzywuzzy import process In[8]: state_to_code = {"VERMONT": "VT", "GEORGIA": "GA", "IOWA": "IA", "Armed Forces Pacific": "AP", "GUAM": "GU", "KANSAS": "KS", "FLORIDA": "FL", "AMERICAN SAMOA": "AS", "NORTH CAROLINA": "NC", "HAWAII": "HI", "NEW YORK": "NY", "CALIFORNIA": "CA", "ALABAMA": "AL", "IDAHO": "ID", "FEDERATED STATES OF MICRONESIA": "FM", "Armed Forces Americas": "AA", "DELAWARE": "DE", "ALASKA": "AK", "ILLINOIS": "IL", "Armed Forces Africa": "AE", "SOUTH DAKOTA": "SD", "CONNECTICUT": "CT", "MONTANA": "MT", "MASSACHUSETTS": "MA", "PUERTO RICO": "PR", "Armed Forces Canada": "AE", "NEW HAMPSHIRE": "NH", "MARYLAND": "MD", "NEW MEXICO": "NM", "MISSISSIPPI": "MS", "TENNESSEE": "TN", "PALAU": "PW", "COLORADO": "CO", "Armed Forces Middle East": "AE", "NEW JERSEY": "NJ", "UTAH": "UT", "MICHIGAN": "MI", "WEST VIRGINIA": "WV", "WASHINGTON": "WA", "MINNESOTA": "MN", "OREGON": "OR", "VIRGINIA": "VA", "VIRGIN ISLANDS": "VI", "MARSHALL ISLANDS": "MH", "WYOMING": "WY", "OHIO": "OH", "SOUTH CAROLINA": "SC", "INDIANA": "IN", "NEVADA": "NV", "LOUISIANA": "LA", "NORTHERN MARIANA ISLANDS": "MP", "NEBRASKA": "NE", "ARIZONA": "AZ", "WISCONSIN": "WI", "NORTH DAKOTA": "ND", "Armed Forces Europe": "AE", "PENNSYLVANIA": "PA", "OKLAHOMA": "OK", "KENTUCKY": "KY", "RHODE ISLAND": "RI", "DISTRICT OF COLUMBIA": "DC", "ARKANSAS": "AR", "MISSOURI": "MO", "TEXAS": "TX", "MAINE": "ME" }
Out[19]: ('MINNESOTA', 95) In[20]: process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=80) Out[20]: ('MINNESOTA', 95) In[21]: process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=96) In[22]: process.extractOne("AlaBAMMazzz", choices=state_to_code.keys(), score_cutoff=80) In[23]: process.extractOne("AlaBAMMazzz",choices=state_to_code.keys()) Out[23]: ('ALABAMA', 78)
转载:https://www.cnpython.com/qa/522980