Diff: Infomer - Waikato Linux Users Group

Differences between current version and predecessor to the previous major change of Infomer.

Other diffs: Previous Revision, Previous Author, or view the Annotated Edit History

Newer page:	version 9	Last edited on Wednesday, December 2, 2009 12:28:54 am	by FirstnameLastname
Older page:	version 7	Last edited on Saturday, September 11, 2004 3:19:09 pm	by CraigBox	Revert

@@ -16,56 +16,64 @@

!!Parse_line

This function takes a line and splits it up into sentances, doing some munging along the way. It also should figure out if this is a directed message, and if so who it is directed to. For example:

+<pre>

<Isomer> Infomer: you suck

+</pre>

should figure out the sentance "you suck" is directed at Infomer. It doesn't have to figure out what "you" is, parse_Sentance does that. It should however figure out split up sentances and convert them into multiple sentances. eg:

+<pre>

<Isomer> Isomer is cool, and is funky

+</pre>

should be split up into "Isomer is cool" "Isomer is funky" (with who being "Isomer" and the target being None). Punctuation in general should be stripped.

+<verbatim>

def parse_line(who,text):

- tm = x = re.match(r"(?P<string>\w+)\: ([ [\w\' ]+)", text)

+ tm = x = re.match(r"(?P<string>\w+)\: ([\w\' ]+)", text)

- ntext = ""

- first = 1

+ ntext = ""

+ first = 1

- if not tm:

- target = None

- line = text

+ if not tm:

+ target = None

+ line = text

- else:

- target = tm.! group(1)

- line = tm.! group(2)

+ else:

+ target = tm.group(1)

+ line = tm.group(2)

- # Now with the target out of the way, begin stripping prefixes and conjunctions etc.

+ # Now with the target out of the way, begin stripping prefixes and conjunctions etc.

- delims = [ ["?", ".", "and", ","]

+ delims = ["?", ".", "and", ","]

- for d in delims:

- line = ~~string~~ .replace(~~line,~~ d, ".")

+ for d in delims:

+ line = line .replace(d, ".")

- sentences = ~~string~~ .split(~~line,~~ ".")

+ sentences = line .split(".")

- for s in sentences:

+ for s in sentences:

- words = ~~string~~ .split(s, " ")

+ words = s .split(" ")

- for p in prefixes:

- if p in words:

- words.remove(p)

+ for p in prefixes:

+ if p in words:

+ words.remove(p)

- if first == 1:

- first =

+ if first == 1:

+ first =

+ ntext = " ".join(words)

- ~~ntext = string.join~~ (~~words~~ , ~~" "~~ )

+ parse_sentence (who , target, ntext )

+</verbatim>

- ~~parse_sentence(who, target, ntext)~~

-

- !!Parse_~~sentance~~

+!!Parse_sentence

This function's job is to take a sentance and clean it up, replacing "you" with "Isomer is", removing little words, and rearranging any sentances to make more sense to the bot. For example this function should be able to take:

+<pre>

(who=Isomer,target=Infomer) "You are a very stupid bot"

+</pre>

and turn it into

+<verbatim>

(who=Isomer,target=Infomer) "Infomer is very stupid"

(who=Isomer,target=Infomer) "Infomer is a bot"

def parse_sentence(speaker, target, sentence):

@@ -77,134 +85,149 @@

# use lower-case since we map user text to lower-case for the comparison :)

replacements = [

# abbreviations - case sensitivity?

- ([ ["you're"], [ ["you", "are"]),

- ([ ["I'm"], [ ["I", "am"]),

- ([ ["It's"], [ ["It", "is"]),

- ([ ["it's"], [ ["it", "is"]),

- ([ ["I", "am"], [ [speaker, "is"]),

- ([ ["my"], [ [speaker + "'s"]),

+ (["you're"], ["you", "are"]),

+ (["I'm"], ["I", "am"]),

+ (["It's"], ["It", "is"]),

+ (["it's"], ["it", "is"]),

+ (["I", "am"], [speaker, "is"]),

+ (["my"], [speaker + "'s"]),

]

- if ( target != None) :

+ if target != None:

replacements.extend(

[

- ([ ["you", "are"], [ [target, "is"]),

- ([ ["are", "you"], [ ["is", target]),

- ([ ["your"], [ [target + "'s"]),

- ### bad idea? ([ ["you"], [ [target]), # catch-all

+ (["you", "are"], [target, "is"]),

+ (["are", "you"], ["is", target]),

+ (["your"], [target + "'s"]),

+ ### bad idea? (["you"], [target]), # catch-all

]

)

- unparsed_tokens = ~~string~~ .split(~~sentence~~ )

- parsed_tokens = [ []

+ unparsed_tokens = sentence .split()

+ parsed_tokens = []

- while ( len(unparsed_tokens) > ) : # assume len() is evaluated each time

+ while len(unparsed_tokens) > : # assume len() is evaluated each time

for pair in replacements:

made_expansion = 0

- term_len = len(pair[ [])

- if ( len(unparsed_tokens) >= term_len and

- map(~~string~~ .lower,unparsed_tokens[[ :term_len]) == pair[ []) :

+ term_len = len(pair[])

+ if len(unparsed_tokens) >= term_len and \

+ map(str .lower,unparsed_tokens[:term_len]) == pair[]:

# replace match with replacement

- unparsed_tokens = pair[ [1] + unparsed_tokens[ [term_len:]

+ unparsed_tokens = pair[1] + unparsed_tokens[term_len:]

made_expansion = 1

break

- if ( made_expansion == ) :

+ if made_expansion == :

# we couldn't make any expansions at this point...

parsed_tokens.append( unparsed_tokens.pop(0) )

parse_phrase(speaker, parsed_tokens)

+</verbatim>

!!parse_phrase

parse_phrase takes a sentance and calls the database primatives on it.

+<verbatim>

def parse_phrase(who,text):

- for i in questions:

- if i==text[ [].lower():

- obj=Object(text[ [2:])

- return get_fact(obj,text[ [1])

+ for i in questions:

+ if i==text[].lower():

+ obj=Object(text[2:])

+ return get_fact(obj,text[1])

- first=len(text)

- first_verb=None

- for i in verbs:

- if i in text and text.index(i)<first:

- first=text.index(i)

- first_verb=i

+ first=len(text)

+ first_verb=None

+ for i in verbs:

+ if i in text and text.index(i)<first:

+ first=text.index(i)

+ first_verb=i

- # Not a recognised statement

- if first_verb==None:

- return ""

+ # Not a recognised statement

+ if first_verb==None:

+ return ""

- # split into two halves and a verb, eg:

- # Perry's hair is very cool -> (Perry's Hair,is,Very Cool)

- lhs = text[ [:first]

- verb = text[ [first]

- rhs = text[ [first+1:]

- if " ".join(lhs).lower() in fake_lhs:

- return ""

+ # split into two halves and a verb, eg:

+ # Perry's hair is very cool -> (Perry's Hair,is,Very Cool)

+ lhs = text[:first]

+ verb = text[first]

+ rhs = text[first+1:]

+ if " ".join(lhs).lower() in fake_lhs:

+ return ""

- obj=Object(lhs)

- add_fact(obj,verb,parse_definition(verb,rhs))

+ obj=Object(lhs)

+ add_fact(obj,verb,parse_definition(verb,rhs))

- return ""

+ return ""

+</verbatim>

!!Misc functions

-This function removes a prefix from a ~~sentance~~

+This function removes a prefix from a sentence

+<verbatim>

def remove_prefix(text):

- prefixes = [ [

- [ ["and"], [ ["also"], [ ["ah"], [ [ahh"], [ ["anyway"], [ ["apparently"],

- [ ["although"], [ ["but"], [ ["bah"], [ ["besides"], [ ["no"], [ ["yes"],

- [ ["yeah"] ]

- flag=1

- while flag==1:

- flag=

- for i in prefixes:

- if map(~~lambda x:string~~ .lower~~(x)~~ ,text[ [:!len(i)])==i:

- text=text[ [len(i)+1:]

- flag=1

- return text

+ prefixes = [

+ ["and"], ["also"], ["ah"], [ahh"], ["anyway"], ["apparently"],

+ ["although"], ["but"], ["bah"], ["besides"], ["no"], ["yes"],

+ ["yeah"] ]

+ flag=1

+ while flag==1:

+ flag=

+ for i in prefixes:

+ if map(str .lower,text[:!len(i)])==i:

+ text=text[len(i)+1:]

+ flag=1

+ return text

+</verbatim>

An object to hold information about an uh, object.

+<verbatim>

# An class to hold an object's information

class Object:

- def __init__(self,tokens):

- self.tokens=[ []

- token=""

- # Join up words into tokens

- # eg: Isomer's Left Foot's sole -> (Isomer,Left Foot,sole)

- for i in tokens:

- token=token+" "+i

- if len(token)>2 and token[ [-2:]=="'s":

- token=token[ [:-2]

- self.tokens.append(token.strip())

- token=""

- # This intentionally adds empty token when it is ""

- self.tokens.append(token.strip())

+ def __init__(self,tokens):

+ self.tokens=[]

+ token=""

+ # Join up words into tokens

+ # eg: Isomer's Left Foot's sole -> (Isomer,Left Foot,sole)

+ for i in tokens:

+ token=token+" "+i

+ if len(token)>2 and token[-2:]=="'s":

+ token=token[:-2]

+ self.tokens.append(token.strip())

+ token=""

+ # This intentionally adds empty token when it is ""

+ self.tokens.append(token.strip())

- def __repr__(self):

- return `self.tokens`

+ def __repr__(self):

+ return `self.tokens`

+</verbatim>

Figures out if this is a special type of information

+<verbatim>

def parse_definition(verb,text):

- if text[ [].lower() in [ ["a","an","the"]:

- return (ISA,text[ [1:])

- if " ".join(text[ [:2]).lower()=="known as":

- return (AKA,text[ [2:])

- if " ".join(text[ [:3]).lower()=="also known as":

- return (AKA,text[ [3:])

- return (NORMAL,text)

+ if text[].lower() in ["a","an","the"]:

+ return (ISA,text[1:])

+ if " ".join(text[:2]).lower()=="known as":

+ return (AKA,text[2:])

+ if " ".join(text[:3]).lower()=="also known as":

+ return (AKA,text[3:])

+ return (NORMAL,text)

+</verbatim>

!!TODO

Parse:

+<pre>

Infomer is a very stupid bot

+</pre>

+<pre>

Infomer is very stupid

Infomer is a bot

+</pre>

requires knowledge of adverbs 'n stuff. adverbs can mostly be detected by checking for "ly" on the end of words.

Add:

+<pre>

tell ''nick'' that ''message''

+</pre>

when ''nick'' next says something say ''who'' wanted to me to tell you that ''message''