Speech Recognition and pseudo AI

Bài đăng này đã không được cập nhật trong 6 năm

In this post I'm going to learn and write about something different than previous post. What we are going to do here include:

Using piglet to play a sound track
Use text-to-speech software such as Festival in Linux to read out text
Make interaction between user and program
Make pseudo intelligence by getting answer from the web

Play audio file with piglet

Let's make a speech recognition folder. speech_recognition. In this folder we are going to create a file called audio.py to play the sounds. We also import several audio files from https://notificationsounds.com/notification-sounds?page=7 . let's download the file suppressed.mp3 and wet.mp3 and put them in the folder audio.

Let's write our audio.py to play the audios:

  import pyglet

  def exiter(dt):
    pyglet.app.exit()

  def play_audio(filename):
    pyglet.lib.load_library('avbin') # You'll may need these two to play mp3 file correctly in Ubuntu
    pyglet.have_avbin=True
    file = pyglet.resource.media(filename) # Read file from source
    file.play()
    pyglet.clock.schedule_once(exiter, file.duration) # exit after playing
    pyglet.app.run()

  play_audio('audio/wet.mp3')
  play_audio('audio/suppressed.mp3')

if you run python3 audio.py in terminal, you will here both sounds.

Now since we can play mp3 file, we can use both sounds above to play start and end sound for recording in speech recognition.

Use text-to-speech program Festival

We need to import speech_recognition module for text to speech precessing and subprocess to run the speech recognition command from the file.

import pyglet
import pyaudio
import wave
import speech_recognition as sr
import subprocess

def say(text):
  subprocess.call("echo " + text + " | festival --tts", shell=True)

def exiter(dt):
  pyglet.app.exit()

def play_audio(filename):
  pyglet.lib.load_library('avbin')
  pyglet.have_avbin=True
  file = pyglet.resource.media(filename)
  file.play()
  pyglet.clock.schedule_once(exiter, file.duration)
  pyglet.app.run()


r = sr.Recognizer() #initialize speech recognition

def initSpeech():
  print("Listening...")
  play_audio('audio/wet.mp3')

  with sr.Microphone() as source: #initialize the microphone
    r.adjust_for_ambient_noise(source)
    print("Say Something")
    audio = r.listen(source)

  play_audio("audio/suppressed.mp3")

  command = ""

  try:
    command = r.recognize_google(audio) #convert audio to text
  except:
    print("Couldn't understand you, bro")

  print("Your command:")
  print(command)
  command = command.replace("'", "")
  say("You said: " + command) #make program answer in speech

initSpeech()

Now you can make the program speak back to you.

Make interaction between user and program

We can make it even better by creating some interaction between user and the program. let's create another file called commands.py.

import subprocess
import os

class Commander:
  def __init__(self):
    self.confirm = ["yes", "affirmative", "si", "sure", "ok", "do it", "yeah", "confirm", "of course", "certainly"]
    self.cancel = ["no", "negative", "never", "don't", "wait", "cancel"]

  def discover(self, text):
    if "what" in text and "name" in text:
      if "my" in text:
        self.respond("You havent told me your name yet")
      else:
        self.respond("My name is Artificial Intelligent. How are you?")

    if "launch" or "open" in text:
      app = text.split(" ", 1)[-1]
      print(app)
      subprocess.call(app, shell=True)

  def respond(self, response):
    print(response)
    subprocess.call("say '" + response + "'", shell=True)

We also make some changes to the file audio.py.

import pyglet
import pyaudio
import wave
import speech_recognition as sr
import subprocess
from commands import Commander

def say(response):
  subprocess.call("say '" + response + "'", shell=True)

def exiter(dt):
  pyglet.app.exit()

def play_audio(filename):
  pyglet.lib.load_library('avbin')
  pyglet.have_avbin=True
  file = pyglet.resource.media(filename)
  file.play()
  pyglet.clock.schedule_once(exiter, file.duration)
  pyglet.app.run()

r = sr.Recognizer()
cmd = Commander()
running = True

def initSpeech():
  print("Listening...")
  play_audio('audio/wet.mp3')

  with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source)
    print("Say Something")
    audio = r.listen(source)

  play_audio("audio/suppressed.mp3")

  command = ""

  try:
    command = r.recognize_google(audio)
  except:
    say("Couldnt understand you, bro")

  print("Your command:")
  if command == "quit":
    running = Fase
  command = command.replace("'", "")
  cmd.discover(command)

while running == True:
  initSpeech()

Now we will come to the most interesting part web scraping.

Add web scraping

We are going to create another file called web_scaper.py. We will use selenium and bs4 library to scrape the web.

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

from bs4 import BeautifulSoup
from urllib.parse import urlparse
import sys

class Fetcher:
  def __init__(self, url):
    self.driver = webdriver.PhantomJS()
    self.driver.wait = WebDriverWait(self.driver, 5)
    self.url = url

  def  lookup(self):
    self.driver.get()

    try:
      ip = self.driver.wait.until(EC.presence_of_element_located(
        (By.CLASS_NAME, "gsfi")
      ))
    except:
      print("Failed, bro")

    soup = BeautifulSoup(self.driver.page_source, "html_parser")
    answer = soup.find_all(class_="_sPg")  
    #since there are several html class for the answer  this is the optional class
    
    if not answer:
      answer = soup.find_all(class_="_m3b")
    else:
      answer = ["I don't know"]

    self.driver.quit()
    return answer[0].get_text()

We also make a small change in the file commands.py.

import subprocess
import os
import requests
from bs4 import BeautifulSoup
from web_scraper import Fetcher

class Commander:
  def __init__(self):
    self.confirm = ["yes", "affirmative", "si", "sure", "ok", "do it", "yeah", "confirm", "of course", "certainly"]
    self.cancel = ["no", "negative", "never", "don't", "wait", "cancel"]

  def discover(self, text):
    if "what" in text and "name" in text:
      if "my" in text:
        self.respond("You havent told me your name yet")
      else:
        self.respond("My name is Artificial Intelligent. How are you?")
    else:
      f = Fetcher("https://www.google.com.kh/search/?q=" + text)
      answer = f.lookup()
      self.respond(answer)
      
    if "launch" or "open" in text:
      app = text.split(" ", 1)[-1]
      print(app)
      subprocess.call(app, shell=True)

  def respond(self, response):
    print(response)
    subprocess.call("say '" + response + "'", shell=True)