Преглед изворни кода

:recycle: removed false positives from prescore matrix by swapping in euclidean distance for cosine similarity

tags/0.0.1^2
juancarbajal98 пре 3 година
родитељ
комит
9e681c8405

+ 2
- 2
backend/db/data-generator/config.json Прегледај датотеку

@@ -1,10 +1,10 @@
1 1
 {
2 2
     "mockOutputPath": "./db/generated",
3
-    "magic": 10000,
3
+    "magic": 1000,
4 4
     "total": 100,
5 5
     "batchSize": 10,
6 6
     "percentageOfSeekers": 90,
7
-    "scoreVals": [107, 414, 721, 1028, 1336, 1648, 2056],
7
+    "scoreVals": [1, 2, 3, 4, 5, 6, 7],
8 8
     "header": "/**\n* GENERATED MOCK SIIMEE DATA\n*/",
9 9
     "possibleZipcodes": [
10 10
         "90012",

+ 41
- 21
backend/db/data-generator/generate.ipynb Прегледај датотеку

@@ -2,7 +2,7 @@
2 2
  "cells": [
3 3
   {
4 4
    "cell_type": "code",
5
-   "execution_count": 15,
5
+   "execution_count": 10,
6 6
    "metadata": {},
7 7
    "outputs": [
8 8
     {
@@ -10,23 +10,26 @@
10 10
      "output_type": "stream",
11 11
      "text": [
12 12
       "\n",
13
-      "Scoring: ((107, 107), (1336, 1336))\n",
14
-      "10000.0\n",
13
+      "Scoring: ((1, 1), (7, 7))\n",
14
+      "1515\n",
15 15
       "\n",
16
-      "Scoring: ((414, 107), (414, 414))\n",
17
-      "8616.0\n",
16
+      "Scoring: ((2, 1), (2, 2))\n",
17
+      "9000\n",
18 18
       "\n",
19
-      "Scoring: ((414, 2056), (1648, 721))\n",
20
-      "5738.0\n",
19
+      "Scoring: ((2, 7), (6, 3))\n",
20
+      "4343\n",
21 21
       "\n",
22
-      "Scoring: ((1648, 721), (107, 414))\n",
23
-      "6173.0\n",
22
+      "Scoring: ((6, 3), (1, 2))\n",
23
+      "4901\n",
24 24
       "\n",
25
-      "Scoring: ((1336, 1648), (1028, 1648))\n",
26
-      "9924.0\n",
25
+      "Scoring: ((5, 6), (4, 6))\n",
26
+      "9000\n",
27 27
       "\n",
28
-      "Scoring: ((107, 414), (721, 721))\n",
29
-      "8616.0\n"
28
+      "Scoring: ((1, 2), (3, 3))\n",
29
+      "7764\n",
30
+      "\n",
31
+      "Scoring: ((1, 1), (1, 1))\n",
32
+      "10000\n"
30 33
      ]
31 34
     }
32 35
    ],
@@ -35,6 +38,8 @@
35 38
     "import copy\n",
36 39
     "from itertools import product\n",
37 40
     "from scipy import spatial\n",
41
+    "import math\n",
42
+    "import numpy\n",
38 43
     "\n",
39 44
     " \n",
40 45
     "f = open('./config.json')\n",
@@ -47,6 +52,11 @@
47 52
     "config[\"file\"] = \"../generated/prescore_matrix.json\"\n",
48 53
     "config[\"delimiter\"] = \"-\"\n",
49 54
     "config[\"version\"] = \"0.1.0\"\n",
55
+    "# max distance = distance between top left and bottom right indices in matrix\n",
56
+    "config[\"max_euclidean_distance\"] =  math.sqrt(\n",
57
+    "    math.pow(len(config[\"all_possible_responses\"])-1, 2) +\n",
58
+    "    math.pow(len(config[\"all_possible_responses\"])-1, 2)\n",
59
+    "    ) \n",
50 60
     "f.close()\n",
51 61
     "\n",
52 62
     "def createPermutations(possibilities, size):\n",
@@ -79,13 +89,19 @@
79 89
     "    b = aspect_ab[1]\n",
80 90
     "    return (1 - spatial.distance.cosine(a,b)) * config[\"magic\"]\n",
81 91
     "\n",
92
+    "def euclideanDistance(aspect_ab):\n",
93
+    "    a = numpy.array(aspect_ab[0])\n",
94
+    "    b = numpy.array(aspect_ab[1])\n",
95
+    "    return numpy.linalg.norm(a - b)\n",
82 96
     "\n",
83 97
     "def prescore_matrix_from(vals):\n",
84 98
     "    m = {}\n",
85 99
     "    for val in vals:\n",
86 100
     "        m[val] = []\n",
87 101
     "        for other_val in vals:\n",
88
-    "            score = scoreAspect((val, other_val))\n",
102
+    "            distance = euclideanDistance((val,other_val))\n",
103
+    "            score = (config[\"max_euclidean_distance\"] - distance) * config[\"magic\"] + 1515\n",
104
+    "            # score = scoreAspect((val, other_val))\n",
89 105
     "            adjusted_score = round(score)\n",
90 106
     "            m[val].append(adjusted_score)\n",
91 107
     "    return m\n",
@@ -120,14 +136,16 @@
120 136
     "        res[5], res[2],\n",
121 137
     "        res[4], res[5],\n",
122 138
     "        res[0], res[1],\n",
139
+    "        res[0], res[0],\n",
123 140
     "    ]\n",
124 141
     "    input_b = [\n",
125
-    "        res[4], res[4], # One aspect\n",
142
+    "        res[6], res[6], # One aspect\n",
126 143
     "        res[1], res[1],\n",
127 144
     "        res[5], res[2],\n",
128 145
     "        res[0], res[1],\n",
129 146
     "        res[3], res[5],\n",
130 147
     "        res[2], res[2],\n",
148
+    "        res[0], res[0],\n",
131 149
     "    ]\n",
132 150
     "    for i in range(round(input_a.__len__() / 2)):\n",
133 151
     "        print(score_aspect(input_a, input_b, m, xy_axis_vals))\n",
@@ -150,11 +168,8 @@
150 168
   }
151 169
  ],
152 170
  "metadata": {
153
-  "interpreter": {
154
-   "hash": "a4118c1262ac97709ca0d199809af279fe9249120a4ac5f6c92359d01f3f0cd0"
155
-  },
156 171
   "kernelspec": {
157
-   "display_name": "Python 3.7.10 64-bit ('base': conda)",
172
+   "display_name": "Python 3.10.6 64-bit",
158 173
    "language": "python",
159 174
    "name": "python3"
160 175
   },
@@ -168,9 +183,14 @@
168 183
    "name": "python",
169 184
    "nbconvert_exporter": "python",
170 185
    "pygments_lexer": "ipython3",
171
-   "version": "3.6.9"
186
+   "version": "3.10.6"
172 187
   },
173
-  "orig_nbformat": 4
188
+  "orig_nbformat": 4,
189
+  "vscode": {
190
+   "interpreter": {
191
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
192
+   }
193
+  }
174 194
  },
175 195
  "nbformat": 4,
176 196
  "nbformat_minor": 2

+ 2410
- 2409
backend/db/generated/prescore_matrix.json
Разлика између датотеке није приказан због своје велике величине
Прегледај датотеку


Loading…
Откажи
Сачувај