Speech Recognition and pseudo AI
Bài đăng này đã không được cập nhật trong 6 năm
In this post I'm going to learn and write about something different than previous post. What we are going to do here include:
- Using piglet to play a sound track
- Use text-to-speech software such as Festival in Linux to read out text
- Make interaction between user and program
- Make pseudo intelligence by getting answer from the web
Play audio file with piglet
Let's make a speech recognition folder. speech_recognition
.
In this folder we are going to create a file called audio.py to play the sounds. We also import several audio files from https://notificationsounds.com/notification-sounds?page=7 . let's download the file suppressed.mp3
and wet.mp3
and put them in the folder audio
.
Let's write our audio.py to play the audios:
import pyglet
def exiter(dt):
pyglet.app.exit()
def play_audio(filename):
pyglet.lib.load_library('avbin') # You'll may need these two to play mp3 file correctly in Ubuntu
pyglet.have_avbin=True
file = pyglet.resource.media(filename) # Read file from source
file.play()
pyglet.clock.schedule_once(exiter, file.duration) # exit after playing
pyglet.app.run()
play_audio('audio/wet.mp3')
play_audio('audio/suppressed.mp3')
if you run python3 audio.py
in terminal, you will here both sounds.
Now since we can play mp3 file, we can use both sounds above to play start and end sound for recording in speech recognition.
Use text-to-speech program Festival
We need to import speech_recognition module for text to speech precessing and subprocess to run the speech recognition command from the file.
import pyglet
import pyaudio
import wave
import speech_recognition as sr
import subprocess
def say(text):
subprocess.call("echo " + text + " | festival --tts", shell=True)
def exiter(dt):
pyglet.app.exit()
def play_audio(filename):
pyglet.lib.load_library('avbin')
pyglet.have_avbin=True
file = pyglet.resource.media(filename)
file.play()
pyglet.clock.schedule_once(exiter, file.duration)
pyglet.app.run()
r = sr.Recognizer() #initialize speech recognition
def initSpeech():
print("Listening...")
play_audio('audio/wet.mp3')
with sr.Microphone() as source: #initialize the microphone
r.adjust_for_ambient_noise(source)
print("Say Something")
audio = r.listen(source)
play_audio("audio/suppressed.mp3")
command = ""
try:
command = r.recognize_google(audio) #convert audio to text
except:
print("Couldn't understand you, bro")
print("Your command:")
print(command)
command = command.replace("'", "")
say("You said: " + command) #make program answer in speech
initSpeech()
Now you can make the program speak back to you.
Make interaction between user and program
We can make it even better by creating some interaction between user and the program. let's create another file called commands.py
.
import subprocess
import os
class Commander:
def __init__(self):
self.confirm = ["yes", "affirmative", "si", "sure", "ok", "do it", "yeah", "confirm", "of course", "certainly"]
self.cancel = ["no", "negative", "never", "don't", "wait", "cancel"]
def discover(self, text):
if "what" in text and "name" in text:
if "my" in text:
self.respond("You havent told me your name yet")
else:
self.respond("My name is Artificial Intelligent. How are you?")
if "launch" or "open" in text:
app = text.split(" ", 1)[-1]
print(app)
subprocess.call(app, shell=True)
def respond(self, response):
print(response)
subprocess.call("say '" + response + "'", shell=True)
We also make some changes to the file audio.py
.
import pyglet
import pyaudio
import wave
import speech_recognition as sr
import subprocess
from commands import Commander
def say(response):
subprocess.call("say '" + response + "'", shell=True)
def exiter(dt):
pyglet.app.exit()
def play_audio(filename):
pyglet.lib.load_library('avbin')
pyglet.have_avbin=True
file = pyglet.resource.media(filename)
file.play()
pyglet.clock.schedule_once(exiter, file.duration)
pyglet.app.run()
r = sr.Recognizer()
cmd = Commander()
running = True
def initSpeech():
print("Listening...")
play_audio('audio/wet.mp3')
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source)
print("Say Something")
audio = r.listen(source)
play_audio("audio/suppressed.mp3")
command = ""
try:
command = r.recognize_google(audio)
except:
say("Couldnt understand you, bro")
print("Your command:")
if command == "quit":
running = Fase
command = command.replace("'", "")
cmd.discover(command)
while running == True:
initSpeech()
Now we will come to the most interesting part web scraping.
Add web scraping
We are going to create another file called web_scaper.py
. We will use selenium and bs4 library to scrape the web.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import sys
class Fetcher:
def __init__(self, url):
self.driver = webdriver.PhantomJS()
self.driver.wait = WebDriverWait(self.driver, 5)
self.url = url
def lookup(self):
self.driver.get()
try:
ip = self.driver.wait.until(EC.presence_of_element_located(
(By.CLASS_NAME, "gsfi")
))
except:
print("Failed, bro")
soup = BeautifulSoup(self.driver.page_source, "html_parser")
answer = soup.find_all(class_="_sPg")
#since there are several html class for the answer this is the optional class
if not answer:
answer = soup.find_all(class_="_m3b")
else:
answer = ["I don't know"]
self.driver.quit()
return answer[0].get_text()
We also make a small change in the file commands.py
.
import subprocess
import os
import requests
from bs4 import BeautifulSoup
from web_scraper import Fetcher
class Commander:
def __init__(self):
self.confirm = ["yes", "affirmative", "si", "sure", "ok", "do it", "yeah", "confirm", "of course", "certainly"]
self.cancel = ["no", "negative", "never", "don't", "wait", "cancel"]
def discover(self, text):
if "what" in text and "name" in text:
if "my" in text:
self.respond("You havent told me your name yet")
else:
self.respond("My name is Artificial Intelligent. How are you?")
else:
f = Fetcher("https://www.google.com.kh/search/?q=" + text)
answer = f.lookup()
self.respond(answer)
if "launch" or "open" in text:
app = text.split(" ", 1)[-1]
print(app)
subprocess.call(app, shell=True)
def respond(self, response):
print(response)
subprocess.call("say '" + response + "'", shell=True)
Now you get the response from searching when aking the program the question:
I hope you find this post interesting and useful. Any comments are welcome.
All rights reserved