Code : Tout sélectionner
Func _CleanVille($str)
$str = _LowerUnaccent($str)
$str = StringReplace($str, "/", " sur ")
$str = StringRegExpReplace($str, "[0-9\-_/\^""\.:;,'\(\)\*\+#~`&\|!\?§%\$£¤={}²€\[\]°@]|\s", " ")
$str = StringStripWS($str, 7)
$str = StringRegExpReplace($str, "\b(sa?inte?(?!$))\b", "st")
Return($str)
EndFunc
Func _TokenizeVille($ville, $pays, ByRef $words)
$ville = StringRegExpReplace($ville, '\bste? ', 'st~')
Local $allwords = StringSplit($ville, ' ', 2)
$words = $allwords
Local $j = 0
For $i = 0 To UBound($allwords) - 1
If (($pays = 'FR' Or $pays = 'BE' Or $pays = 'CH' Or $pays = 'LU' Or $pays = 'GP' Or $pays = 'MQ' Or $pays = 'RE' Or $pays = 'NC' Or $pays = 'ES' Or $pays = 'GF' Or $pays = 'AD') And _
Not ($ville = 'les pres' And $i = 1) And _
_Sl3Exists($hADB, "select mot from mots where not ("&$i&" = 0 and flgville) and flgStopAdr and mot = "&X($allwords[$i])&";")) _
Or (StringLen($allwords[$i]) = 1 And Not $ville = 'y') Then
_ArrayDelete($words, $j)
Else
$j += 1
EndIf
Next
Return($j)
EndFunc
Func _TrouvePaysCpVille($paysIn, $cpIn, $villeIn, ByRef $ZipCandidats)
Local $nrows, $ncols
Local $villeTok, $delta, $method
Local $ret
$villeIn = _CleanVille($villeIn)
; Tests effectués séquentiellement, autorisant une réorganisation ultérieure aisée pour optimisation en pratique
Local $villeInUC = _Upper($villeIn)
$cpIn = _Upper($cpIn)
$ZipCandidats = ''
_SQLite_Exec($hADB, "begin immediate;")
; 1er essai : si tout est trouvé exactement et sans recherches tortueuses, bingo !
$method = 1
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", 100.0, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and code = "&X($cpIn)&" and ville = "&X($villeInUC)&";")
If _SQLite_Changes($hADB) = 1 Then
_SQLite_GetData2d($hADB, "select score, pays, code, ville, id from mdb.ZipMatches;", $ZipCandidats, $nrows, $ncols)
_SQLite_Exec($hADB, "delete from mdb.tokmatch; delete from mdb.zipmatch; delete from mdb.zipmatches;")
_SQLite_Exec($hADB, 'commit;')
Return
EndIf
; 2nd essai : on recherche le pays, le code postal et la suite des tokens de la ville
$method += 1
Local $n = _TokenizeVille($villeIn, $paysIn, $villeTok)
Local $tokstr = _ArrayToString($villeTok, ' ')
_ConsoleWrite('Tokenization is >' & $tokstr & '<' & @LF)
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", 99.0, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and code = "&X($cpIn)&" and motsville = "&X($tokstr)&";")
If _SQLite_Changes($hADB) = 1 Then
_SQLite_GetData2d($hADB, "select score, pays, code, ville, id from mdb.ZipMatches;", $ZipCandidats, $nrows, $ncols)
_SQLite_Exec($hADB, "delete from mdb.tokmatch; delete from mdb.zipmatch; delete from mdb.zipmatches;")
_SQLite_Exec($hADB, 'commit;')
Return
EndIf
; 3ème essai : on recherche le pays, on admet 1 erreur sur le code postal et on compare sur la suite des tokens de la ville
$method += 1
If $villeIn <> '' Then
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(995 * (1.0 - typos(code, "&X($cpIn)&") / ("&StringLen($cpin)&" * 3.0)) as integer) * 0.1, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and typos(code, "&X($cpIn)&") <= 1 and motsville = "&X($tokstr)&";")
EndIf
If _SQLite_Changes($hADB) = 1 Then
_SQLite_GetData2d($hADB, "select score, pays, code, ville, id from mdb.ZipMatches;", $ZipCandidats, $nrows, $ncols)
_SQLite_Exec($hADB, "delete from mdb.tokmatch; delete from mdb.zipmatch; delete from mdb.zipmatches;")
_SQLite_Exec($hADB, 'commit;')
Return
EndIf
; 4ème essai : on recherche le pays, le code postal et on essaye de faire coller les tokens de la ville et eux seulement
$method += 1
If $villeIn <> '' Then
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(989 * (1.0 - typos(code, "&X($cpIn)&") / ("&StringLen($cpin)&" * 3.0)) as integer) * 0.1, " & _
"paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and code = "&X($cpIn)&" and (motsville like "&X($tokstr & ' %')&" or motsville like "&X('% ' & $tokstr)&");")
EndIf
If _SQLite_Changes($hADB) = 1 Then
_SQLite_GetData2d($hADB, "select score, pays, code, ville, id from mdb.ZipMatches;", $ZipCandidats, $nrows, $ncols)
_SQLite_Exec($hADB, "delete from mdb.tokmatch; delete from mdb.zipmatch; delete from mdb.zipmatches;")
_SQLite_Exec($hADB, 'commit;')
Return
EndIf
; 5ème essai : on recherche le pays, le code postal et on essaye de faire coller les tokens connus et eux seulement
$method += 1
If $villeIn <> '' Then
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(987 * (1.0 - typos(code, "&X($cpIn)&") / ("&StringLen($cpin)&" * 3.0)) as integer) * 0.1, " & _
"paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and code = "&X($cpIn)&" and "&X($tokstr)&" like (motsville || ' %');")
EndIf
If _SQLite_Changes($hADB) = 1 Then
_SQLite_GetData2d($hADB, "select score, pays, code, ville, id from mdb.ZipMatches;", $ZipCandidats, $nrows, $ncols)
_SQLite_Exec($hADB, "delete from mdb.tokmatch; delete from mdb.zipmatch; delete from mdb.zipmatches;")
_SQLite_Exec($hADB, 'commit;')
Return
EndIf
; 6ème essai : on recherche le pays et on admet 2 erreurs sur le code postal et aucune sur les tokens de la ville
$method += 1
If $villeIn <> '' Then
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(985 * (1.0 - (typos(code, "&X($cpIn)&") / "&StringLen($cpin)&" - typos(ville, "&X($villeIn)&") / "&StringLen($villeIn)&") * 3.0) as integer) * 0.1, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and typos(code, "&X($cpIn)&") <= 2 and motsville = "&X($tokstr)&";")
EndIf
; 7ème essai : on recherche le pays et on admet 1 erreur sur le code postal et 1 erreur sur les tokens de la ville
$method += 1
If $villeIn <> '' Then
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(980 * (1.0 - (typos(code, "&X($cpIn)&") / "&StringLen($cpin)&" - typos(ville, "&X($villeIn)&") / "&StringLen($villeIn)&") * 3.0) as integer) * 0.1, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and typos(code, "&X($cpIn)&") <= 1 and typos(motsville, "&X($tokstr)&") <= 1;")
EndIf
; 8ème essai : on recherche le pays, le code postal et on admet de 1 à 3 erreurs sur la ville selon longueur
; si la ville est vide, on ramène tous les enregistrements de ce code postal pour ce pays
$method += 1
Switch StringLen($villeIn)
Case 0
$delta = -1
Case 1 To 2
$delta = 0
Case 3 To 6
$delta = 1
Case 7 To 10
$delta = 2
Case Else
$delta = 3
EndSwitch
If $delta < 0 Then
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", 95.0, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and code = "&X($cpIn)&";")
Else
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(1000 * (1.0 - typos(ville, "&X($villeIn)&") / ("&StringLen($villeIn)&" * 3.0)) as integer) * 0.1, paysIso, code, ville, id from codespostaux " & _
"where paysIso = "&X($paysIn)&" and code = "&X($cpIn)&" and typos(ville, "&X($villeIn)&") <= "&$delta&";")
EndIf
; 9ème essai : on recherche la ville en admettant 1 à 3 erreurs selon longueur et le code en espérant que le pays "remonte"
$method += 1
If $villeIn <> '' Then
Switch StringLen($villeIn)
Case 1 To 2
$delta = 0
Case 3 To 6
$delta = 1
Case 7 To 10
$delta = 2
Case Else
$delta = 3
EndSwitch
_SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) " & _
"select "&$Method&", cast(1000 * ((1.0 - cast(typos(ville, "&X($villeIn)&") as real) / "&StringLen($villeIn)&" + case code when "&X($cpIn)&" then 1 else 0 end + case paysiso when "&X($paysIn)&" then 1 else 0 end) / 3.0) as integer) * 0.1, paysIso, code, ville, id from codespostaux " & _
"where code = "&X($cpIn)&" and typos(ville, "&X($villeIn)&") <= "&$delta&";")
EndIf
;~ ; 10ème essai : on recherche les tokens de la ville, le début du code sans les trois derniers caractères et le pays
;~ $method += 1
;~ If $n Then
;~ For $i = 0 to $n - 1
;~ If StringLen($villeTok[$i]) <= 5 Then
;~ $delta = 1
;~ Else
;~ $delta = 2
;~ EndIf
;~ _SQLite_Exec($hADB, "insert into mdb.tokmatch (idzip, nbtypos, howclose) " & _
;~ "select zipid, " & _
;~ "typos(zipmot, "&X($villeTok[$i])&") as typ, " & _
;~ "1.0 - cast(typos(zipmot, "&X($villeTok[$i])&") as Real) / "&StringLen($villeTok[$i])&" " & _
;~ "from zipmots where typ <= "&$delta&";")
;~ Next
;~ _SQLite_Exec($hADB, "insert into mdb.zipmatch (id_zip, samepays, ziptypos) " & _
;~ "select distinct idzip, " & _
;~ "case paysiso when "&X($paysIn)&" then 1 else 0 end, " & _
;~ "typos(code, "&X($cpIn)&") from mdb.tokmatch join codespostaux on idzip = id " & _
;~ "where idzip in (select distinct idzip from mdb.tokmatch);" & _
;~ "update mdb.zipmatch set " & _
;~ "nbtoks = (select count(*) from mdb.tokmatch where idzip = id_zip), " & _
;~ "toktypos = (select total(nbtypos) from mdb.tokmatch where idzip = id_zip), " & _
;~ "zipclose = (select avg(howclose) from mdb.tokmatch where idzip = id_zip);" & _
;~ "update mdb.zipmatch set score = cast(((150.0 * samepays) + 150.0 * (1.0 - cast(ziptypos as real) / "&StringLen($cpin)&") + 700.0 * nbtoks * zipclose / "&$n&") as integer) * 0.1;")
;~ _SQLite_Exec($hADB, "insert into mdb.ZipMatches (method, score, pays, code, ville, id) select "&$Method&", score - 0.29 * typos(ville, "&X($villeIn)&"), paysIso, code, ville, id from mdb.zipmatch join codespostaux on id_zip = id group by id_zip;")
;~ EndIf
; ici on récupère tout ce qui a été pêché, on trie par score et on garde les 100 meilleurs résultats
_SQLite_GetData2d($hADB, "select distinct score, pays, code, ville, id from mdb.ZipMatches group by id order by score desc, code, ville limit 100;", $ZipCandidats, $nrows, $ncols)
_SQLite_Exec($hADB, "delete from mdb.tokmatch; delete from mdb.zipmatch; delete from mdb.zipmatches;")
_SQLite_Exec($hADB, 'commit;')
EndFunc