99import odml .tools .xmlparser
1010from hashlib import md5
1111py3 = True
12-
1312try :
1413 from urllib .request import urlopen
1514except ImportError :
1615 from urllib import urlopen
17-
1816import threading
1917
20- CACHE_AGE = datetime .timedelta (days = 1 )
18+ CACHE_AGE = datetime .timedelta (days = 14 )
19+ CACHE_DIR = os .path .join (tempfile .gettempdir (), "odml.cache" )
20+ FILE_MAP_FILE = os .path .join (CACHE_DIR , "odml_filemap.csv" )
21+
22+ if not os .path .exists (CACHE_DIR ):
23+ try :
24+ os .makedirs (CACHE_DIR )
25+ except OSError : # might happen due to concurrency
26+ if not os .path .exists (CACHE_DIR ):
27+ raise
28+
29+
30+ def open_file_map ():
31+ """
32+ Opens the file_map file stored in the cache that maps the filenames to the urls of the
33+ respective terminolgies.
34+ """
35+ file_map = {}
36+ if not os .path .exists (FILE_MAP_FILE ):
37+ return file_map
38+ else :
39+ with open (FILE_MAP_FILE , 'r' ) as f :
40+ for l in f .readlines ():
41+ parts = l .strip ().split (';' )
42+ file_map [parts [0 ].strip ()] = parts [1 ].strip ()
43+ return file_map
2144
2245
2346def cache_load (url ):
2447 """
25- load the url and store it in a temporary cache directory
48+ Load the url and store it in a temporary cache directory
2649 subsequent requests for this url will use the cached version
2750 """
28- filename = md5 (url .encode ()).hexdigest () + os .path .basename (url )
29- cache_dir = os .path .join (tempfile .gettempdir (), "odml.cache" )
30- if not os .path .exists (cache_dir ):
31- try :
32- os .makedirs (cache_dir )
33- except OSError : # might happen due to concurrency
34- if not os .path .exists (cache_dir ):
35- raise
36- cache_file = os .path .join (cache_dir , filename )
51+ filename = md5 (url .encode ()).hexdigest () + '__' + os .path .basename (url )
52+ cache_file = os .path .join (CACHE_DIR , filename )
53+
3754 if not os .path .exists (cache_file ) \
3855 or datetime .datetime .fromtimestamp (os .path .getmtime (cache_file )) < \
3956 datetime .datetime .now () - CACHE_AGE :
@@ -42,18 +59,69 @@ def cache_load(url):
4259 except Exception as e :
4360 print ("Failed loading '%s': %s" % (url , e ))
4461 return
45-
4662 fp = open (cache_file , "w" )
4763 fp .write (data )
4864 fp .close ()
49-
65+ with open (FILE_MAP_FILE , 'a' ) as fm :
66+ fm .write (filename + "; " + url + "\n " )
5067 return open (cache_file )
5168
5269
70+ def cached_files ():
71+ """
72+ Returns a list of all locally cached files.
73+ """
74+ filelist = [ f for f in os .listdir (CACHE_DIR ) if \
75+ (f .endswith (".xml" ) and os .path .isfile (os .path .join (CACHE_DIR , f )))]
76+ return filelist
77+
78+
79+ def show_cache ():
80+ """
81+ Show all locally cached files. Just for display.
82+ """
83+ print ("terminology %s \t updated" % (19 * " " ))
84+ print (60 * "-" )
85+ files = cached_files ()
86+ for f in files :
87+ cache_file = os .path .join (CACHE_DIR , f )
88+ file_timestamp = datetime .datetime .fromtimestamp (os .path .getmtime (cache_file ))
89+ disp_name = '_' .join (f .split ('__' )[1 :])
90+ if len (disp_name ) > 30 :
91+ disp_name = disp_name [:16 ] + "..."
92+ if len (disp_name ) < 30 :
93+ disp_name = disp_name + (30 - len (disp_name )) * " "
94+ print (" %s \t %s" % (disp_name , file_timestamp ))
95+
96+
97+ def clear_cache ():
98+ """
99+ Clears the cache, i.e. deletes all locally stored files. Does not remove the cache folder, though.
100+ """
101+ filelist = cached_files ();
102+ for f in filelist :
103+ os .remove (os .path .join (CACHE_DIR , f ))
104+ if os .path .exists (FILE_MAP_FILE ):
105+ os .remove (FILE_MAP_FILE )
106+
107+
108+ def from_cache (term ):
109+ """
110+ Fills the terminology with the definitions stored in the cache.
111+ """
112+ assert isinstance (term , Terminologies )
113+ file_list = cached_files ();
114+ file_map = open_file_map ();
115+ for f in file_map :
116+ if file_map [f ] not in term :
117+ term .load (file_map [f ])
118+
119+
53120class Terminologies (dict ):
54121 loading = {}
122+ types = None
55123
56- def load (self , url ):
124+ def load (self , url = "http://portal.g-node.org/odml/terminologies/v1.0/terminologies.xml" ):
57125 """
58126 load and cache a terminology-url
59127
@@ -62,11 +130,14 @@ def load(self, url):
62130 if url in self :
63131 return self [url ]
64132
133+ encode_name = md5 (url .encode ()).hexdigest () + '__' + os .path .basename (url )
134+ if encode_name in self :
135+ return self [encode_name ]
136+
65137 if url in self .loading :
66138 self .loading [url ].join ()
67139 self .loading .pop (url , None )
68140 return self .load (url )
69-
70141 return self ._load (url )
71142
72143 def _load (self , url ):
@@ -95,14 +166,129 @@ def deferred_load(self, url):
95166 self .loading [url ] = threading .Thread (target = self ._load , args = (url ,))
96167 self .loading [url ].start ()
97168
169+ def empty (self ):
170+ """
171+ Tells whether there are no terminolgies stored.
172+ """
173+ return len (self ) == 0
174+
175+ def type_list (self ):
176+ """
177+ returns a dict of all types stored in the cache together with the terminologies it is defined in.
178+ """
179+ if self .empty ():
180+ from_cache (self )
181+ if not self .types :
182+ self .types = {}
183+ for k in self .items ():
184+ for s in k [1 ].itersections ():
185+ if s .type in self .types :
186+ self .types [s .type ].append ((k [0 ], s .get_path ()))
187+ else :
188+ self .types [s .type ] = [(k [0 ], s .get_path ())]
189+ return self .types
190+
191+ def _compare_repo (self , candidate_repo , candidate_path , pattern , relaxed ):
192+ parts = pattern .lower ().split ()
193+ match = True
194+ repo = candidate_repo .lower ()
195+ path = candidate_path .lower ()
196+ for p in parts :
197+ if p .startswith ("!" ):
198+ if relaxed :
199+ match = match or (p [1 :] not in repo .lower () and p [1 :] not in path )
200+ else :
201+ match = match and (p [1 :] not in repo and p [1 :] not in path )
202+ else :
203+ if relaxed :
204+ match = match or (p in repo or p in path )
205+ else :
206+ match = match and (p in repo or p in path )
207+ return match
208+
209+ def _find_match (self , type_matches , pattern , relaxed = False ):
210+ if pattern :
211+ matches = []
212+ for i , (r , p ) in enumerate (type_matches ):
213+ if self ._compare_repo (r , p , pattern , relaxed ):
214+ matches .append (type_matches [i ])
215+ return matches
216+ else : # simply return first
217+ return type_matches
218+ return []
219+
220+ def _get_section_by_type (self , section_type , pattern = None , relaxed = False , find_all = False ):
221+ if self .empty () or len (self .types ) == 0 :
222+ self .type_list ()
223+ matches = []
224+ if section_type in self .types :
225+ matches = self ._find_match (self .types [section_type ], pattern , relaxed )
226+ if len (matches ) > 0 :
227+ if len (matches ) > 1 and find_all :
228+ sections = []
229+ for m in matches :
230+ sections .append (self [m [0 ]].get_section_by_path (m [1 ]).clone ())
231+ return sections
232+ else :
233+ return self [matches [0 ][0 ]].get_section_by_path (matches [0 ][1 ]).clone ()
234+ else :
235+ return None
236+
237+
98238terminologies = Terminologies ()
99239load = terminologies .load
100240deferred_load = terminologies .deferred_load
101241
102242
243+ def get_section_by_type (section_type , pattern = None , relaxed = False , find_all = False ):
244+ """
245+ Finds a section type in the cached repositories and returns it.
246+
247+ @param section_type the type of the section must be a valid full match. Returns the
248+ first match.
249+ @param pattern a optional filter pattern, i.e. a string with characteristics
250+ regarding the repository the section should originate from
251+ and its path in the file (see below)
252+ @param relaxed optional, defines whether all criteria must be met or not.
253+ @param find_all optional, sets whether all possible matches are returned
254+
255+ @return Section or list of sections depending on the find_all parameter, None,
256+ if no match was found.
257+
258+ Example:
259+ Suppose we are looking for a section type 'analysis' and it should be from the g-node
260+ terminologies.
261+ s = get_section_by_type("analysis", "g-node")
262+ print(s)
263+ <Section Analysis[analysis] (0)>
264+ If we want to exclude the g-node terminologies, simply put an ! in front of the pattern
265+ s = get_section_by_type("analysis", "!g-node")
266+
267+ Multiple criteria can be combined (e.g. get_section_by_type("setup/daq", "g-node blackrock !cerebus")).
268+ The relaxed parameter controls whether all criteria have to match.
269+ """
270+ return terminologies ._get_section_by_type (section_type , pattern , relaxed , find_all )
271+
272+ def find_definitions (section_type ):
273+ """
274+ Finds repositories that define the provided section type.
275+
276+ @param section_type the requested section type
277+
278+ @return list of tuples containing the repository and the path at which the respective
279+ section can be found. List may be empty.
280+ """
281+ tl = terminologies .type_list ()
282+ if section_type in tl :
283+ return tl [section_type ]
284+ else :
285+ return []
286+
103287if __name__ == "__main__" :
288+ from IPython import embed
104289 print ("Terminologies!" )
105- t = Terminologies ( )
106- t .load ('http://portal.g-node.org/odml/terminologies/v1.0/terminologies.xml' )
290+ from_cache ( terminologies )
291+ # t.load('http://portal.g-node.org/odml/terminologies/v1.0/terminologies.xml')
107292 # t.load('http://portal.g-node.org/odml/terminologies/v1.0/analysis/power_spectrum.xml')
108-
293+ find_definitions ("analysis" )
294+ embed ()
0 commit comments